Compare commits

...

170 Commits

Author SHA1 Message Date
wuhuanzhou 587d99ae44
update compilation with C++14 (#31815)
4 years ago
tianshuo78520a b09c1ce09a
fix whl package push pypi (#31585)
4 years ago
Thunderbrook 393b3bd6b7
fix split core (#31892)
4 years ago
wuhuanzhou 3a95a0bc26
update cmake minimum version to 3.15 (#31807)
4 years ago
taixiurong 52b05baca3
fix some bug in transformer training in xpu (#31918)
4 years ago
Wenyu 5394194e3a
support minus-int idx to LayerList (#31750)
4 years ago
furnace ef8323d49e
[ROCM] Add ROCm support for warpctc op (#31817)
4 years ago
Jiawei Wang 95f808c878
fix stack op grad nullptr (#31962)
4 years ago
liym27 57d4288ad4
[dynamic setitem] Fix bug of dynamic setitem: Decerease axes to do right broadcast (#31960)
4 years ago
石晓伟 0fa6c8a35c
fix a syntax error, test=develop (#31930)
4 years ago
Pei Yang 98e803e04f
map_matmul_to_mul_pass support 3dim (#31958)
4 years ago
wuhuanzhou a37a7f67e1
modify CI recommend information (#31395)
4 years ago
jakpiase 6dca7a1de7
Added int8 kernel for oneDNN LSTM op (#31894)
4 years ago
Pei Yang 14b7e3cf06
[Paddle-TRT] TRT inference support for BERT/Transformer in paddle 2.0 api (#31744)
4 years ago
Zhou Wei 245252b86e
fix bug when dtype of to_tensor is core.VarType (#31931)
4 years ago
Zhen Wang e1f931610e
Fix save/load error in imperative qat UT. (#31937)
4 years ago
Yiqun Liu e50bc2c2a6
Enhance cmake to support specifying CUDA_ARCH_NAME to Ampere. (#31923)
4 years ago
Zhou Wei 04a49b097e
[Custom OP]Remove old custom OP and reduce whl package volume (#31813)
4 years ago
wangguanzhong fe2848686b
add exclusive for test_conv2d_op, test=develop (#31936)
4 years ago
chajchaj 73a6fa3ed0
add deprecated for softmax_with_cross_entropy (#31722)
4 years ago
Shang Zhizhou 8084b7594b
fix batchnorm when inpu dims < 3 (#31933)
4 years ago
zlsh80826 64ee255ffd
[Paddle-TRT] yolobox (#31755)
4 years ago
Aurelius84 c4b60efabd
Fix segment Fault from set_value (#31891)
4 years ago
wuhuanzhou 17030ff28b
fix op benchmark ci error caused by missing test_pr branch, test=document_fix (#31920)
4 years ago
niuliling123 a71d72d921
relu forward and backward with vectortype (#31869)
4 years ago
tianshuo78520a 8829a309fe
Delete cudnn6 code (#31835)
4 years ago
wanghuancoder b48841ba2e
modify API nn.Bilinear's doc (#31889)
4 years ago
liym27 525c32e33c
Fix bug of set_value op:Decerease axes to do right broadcast (#31875)
4 years ago
ronnywang 123949eb48
[ROCM] added a cudnn switch of conv2d for rocm platform (#31836)
4 years ago
Shang Zhizhou 61805d8f0a
fix cmake model path (#31866)
4 years ago
Jiabin Yang 51eb29de18
[CustomOP] Add shape related constructor for Tensor (#31681)
4 years ago
zlsh80826 e3a38d790a
[Paddle-TRT] roi_align_plugin (#31732)
4 years ago
zlsh80826 bfb5cf5567
[Paddle-TRT] trt affine channel converter (#31628)
4 years ago
cc b47478efc2
[dygraph qat] Use layer to calculate output scale (#31861)
4 years ago
lilong12 c3974d0e2a
[3D-parallel] Reformat pipeline parallel (#31786)
4 years ago
zlsh80826 01aa252624
[Paddle-TRT] multiclass nms (#31742)
4 years ago
Wilber 70b67f1029
fix go api bug. (#31857)
4 years ago
tianshuo78520a e804f08559
delete include framework.pb.h (#31859)
4 years ago
Chengmo f58cb01864
【Paddle.Fleet】fix dataset zip py3 bug (#31441)
4 years ago
Kaipeng Deng bf09dcb346
add GPU tensor notice & update default_collate_fn/default_convert_fn. test=develop (#31763)
4 years ago
Chen Weihang 27f2d8df8e
Polish two error messages (#31852)
4 years ago
Zhou Wei 511e204e62
LRScheduler.get_lr should not update lr in LinearWarmup (#31843)
4 years ago
niuliling123 6472d62093
Revert "add relu forward kernel and backward kernel (#31613)" (#31853)
4 years ago
winter-wang e7f28d6c0d
fix runtime crash when rnn model inference, test=develop (#31833)
4 years ago
parap1uie-s 5d89ec36dc
Update pooling.py (#31829)
4 years ago
Huihuang Zheng 649868ffb2
[Dy2stat] Fix the bug that loop_body_func may return single element (#31806)
4 years ago
Wojciech Uss e5f7a834d4
fix cache key in concat oneDNN kernel (#31820)
4 years ago
Aurelius84 f2cfc0f46d
[CustomOp]Avoid raising warning while import paddle (#31804)
4 years ago
cc 84a551380e
[dygraph qat] Refine saving output scale to infer program (#31784)
4 years ago
Chen Weihang 68497e7b39
change trainable to stop_gradient in optimizer (#31823)
4 years ago
ronnywang 270699e647
[ROCM] fix test_matmul_v2_op (#31802)
4 years ago
Zhou Wei 1eb927f935
Restore the third-party library cache for windows (#31811)
4 years ago
Chen Weihang 3f66e7deab
add cmath header for bfloat (#31792)
4 years ago
Feiyu Chan 4046f1303a
add coalesce_tensor into white list when checking re-creation of parameters (#31800)
4 years ago
Zhou Wei a70de87d76
Update windows compiler and CI from VS2015 to VS2017 (#31652)
4 years ago
Wilber f4d9212de2
trt plugin upgrade to pluginv2ext (#31670)
4 years ago
niuliling123 372ac08a17
add relu forward kernel and backward kernel (#31613)
4 years ago
Wojciech Uss 814b38e30f
update scale collection and propagation algorithm (#31783)
4 years ago
tianshuo78520a 513641e153
Delete fast_check_nan_inf (#31788)
4 years ago
Shang Zhizhou 9d04ef7369
fix tensorrt output varible reshape (#31733)
4 years ago
Qi Li 46dd1d4aad
[ROCM] fix reduce_sum nan in ROCM platform, test=develop (#31780)
4 years ago
gongweibao f72d197ec5
fix launch ps ut test=develop (#31771)
4 years ago
Tao Luo 032de0bfd0
update approval (#31782)
4 years ago
zlsh80826 bfced39eb6
[Paddle-TRT] nearest_interp op (#31626)
4 years ago
arlesniak 7ccf6b6030
[oneDNN] Initial bf16 amp integration (#31093)
4 years ago
lilong12 a501a7b0ca
[3D-parallel] add 1f1b scheduler for pipeline (#31566)
4 years ago
guofei ed7956a816
Fix skip_quant in QAT (#31704)
4 years ago
ronnywang 8c19d7aa2f
[ROCM] fix test_conv2d_transpose_op (#31749)
4 years ago
Ouyang Chao a45c8ca69d
fix bug of DepthwiseConvTransposeGradKernel (#31762)
4 years ago
Jacek Czaja 25fc2a1fdb
[oneDNN] Added Elementwise Mul grad fp32/bf16 (#31647)
4 years ago
Chen Weihang 878e117b6d
[CustomOp] Support float16 in custom op (#31725)
4 years ago
ronnywang c9e1d9dc31
[ROCM] fix test_rnn_op (#31735)
4 years ago
zlsh80826 1c67cf0c98
run radix sort of proposals layer on context stream (#31631)
4 years ago
Chen Weihang e429deb0c4
[CustomOp] Support attribute in infershape function (#31713)
4 years ago
Adam Osewski a4a2b77def
[oneDNN] lookup_table op with support for BF16 data type. (#31558)
4 years ago
zlsh80826 c86e771e94
NMS Performance Optimization (#31634)
4 years ago
zlsh80826 50cafa0b0c
remove redundant sync, set collect/dist kernel to context stream, sub_lod memcpy opt (#31641)
4 years ago
cc 1d197f6c97
[dgraph qat] Refine calculating output scale of dygraph qat (#31710)
4 years ago
ronnywang 420527f0d9
[ROCM] fix layer_norm, norm, p_norm, test_sequence_softmax_op, test_math_op_patch_var_base (#31709)
4 years ago
Chen Weihang 87852616aa
[CustomOp] Support complex dtype in custom op (#31657)
4 years ago
zlsh80826 fe241fd02f
[Paddle-TRT] gather converter (#31640)
4 years ago
zlsh80826 4ea3427865
[Paddle-TRT] support batch axis concatenation when using dynamic shape (#31627)
4 years ago
Zhou Wei d4282ea97e
fix multi cuda environment bug (#31694)
4 years ago
Chengmo 09482ddec4
【Paddle.Fleet】Fix one ps gradient clip (#31664)
4 years ago
Kaipeng Deng 740359edaf
remove useless import (#31700)
4 years ago
Zhang Ting 7f50bb7ec1
support NHWC for temporal_shift op (#31642)
4 years ago
liym27 402288ad65
In __getitem__, convert integers to int64 Tensor not int32 to be compatible with Lite(#31658)
4 years ago
Chen Weihang 2fbe9b097a
[CustomOp] Remove Eigen dependencies of float16 (#31669)
4 years ago
cc 19592d2b71
Refine dygraph qat, test=develop (#31680)
4 years ago
Zhou Wei 4c0c55bba1
support Geforce RTX 30+ GPU (#31529)
4 years ago
YUNSHEN XIE cdc5a55ac1
turn off added ut check on windows (#31660)
4 years ago
Qi Li d9b50f664f
[ROCM] update ci scripts and dockefile, test=develop (#31551)
4 years ago
YUNSHEN XIE 1a6e3b04cd
Second optimization of retry method (#31646)
4 years ago
wuhuanzhou 41e9ecfd1f
Optimize compilation with Ninja (#31449)
4 years ago
yiak c1b1ccfbf5
Update tinyformat.h (#31612)
4 years ago
gongweibao 9c624b16d5
Extend unittest time of (#31570)
4 years ago
YUNSHEN XIE 580442ceba
fix wget with no proxy on windows (#31505)
4 years ago
ronnywang da10c5cf8b
[ROCM] fix softmax_with_cross_entropy_op, test=develop (#31629)
4 years ago
LielinJiang 75433126df
Fix summary bug when calaculating output shape (#31549)
4 years ago
ShenLiang c3634c6b0a
fix amp bug of fleet (#31532)
4 years ago
Chen Weihang 027b574a0e
[CustomOp] Remove the dependence of the underlying data types on eigen (#31602)
4 years ago
WangXi 9066b74f58
c_gen_nccl_id add SocketServer to persit server (#31589)
4 years ago
Kaipeng Deng a32e8bf1e7
DataLoader supprot dict str (#31481)
4 years ago
Chen Weihang 30a627aaf3
Normalized function parameter writing (#31588)
4 years ago
Pei Yang cac9635a67
[Paddle-TRT] Fix engine key in trt int8 calibration (#31513)
4 years ago
Shang Zhizhou 50ac7dbfd0
Trt elementwise plugin serialize (#31587)
4 years ago
guofei ef0dd3efed
Support loading parameters from checkpoint to save quantized model (#31419)
4 years ago
whs da9dda5c9b
Make CreateProgramDesc more robust (#31543)
4 years ago
hong 99dcd66508
try to fix imperative orc unitest error; test=develop (#31568)
4 years ago
Qi Li 3d5aa9d10a
[ROCM] fix conv2d and conv3d op, test=develop (#31553)
4 years ago
YUNSHEN XIE f302bb4f8b
help timeout ut debug (#31500)
4 years ago
Chen Weihang 95cceb2dd7
[CustomOp] Support duplicable op input and output (#31535)
4 years ago
Aurelius84 def27bc801
[Dy2stat]Fix bug with static_convert_var_shape in locals scope (#31556)
4 years ago
YUNSHEN XIE 49c3d2a97b
modified show_ut_retry_result (#31528)
4 years ago
LielinJiang ac493f2c72
Update comments for API `RandomResizedCrop` (#31539)
4 years ago
lidanqing 0f1e7e3d52
[Bug fix] Different machine generate different binary file, remove md5 check (#31482)
4 years ago
jiangcheng 9ed6c895f1
optimize range op by place parameters on cpu rather than gpu, test=develop (#30811)
4 years ago
Thunderbrook 3789a69923
solve bug in heter mode (#31531)
4 years ago
chajchaj 6148b87f9d
add softmax_switch for softmax_with_cross_entropy_op, test=develop (#31428)
4 years ago
Aurelius84 f3959e9ddc
[save/load] Fix bug with input_spec=dict[InputSpec] in jit.save (#31517)
4 years ago
WangXi 83a2fb1f08
Add collective async wait op (#31463)
4 years ago
lilong12 0205e9f84e
remove the send/recv of tensor size (#31460)
4 years ago
Aurelius84 c8ae837d52
[CustomOp]Fix setup_install timeout (#31484)
4 years ago
furnace 910f377fa5
Bugfix rocm (#31490)
4 years ago
Qi Li 416e47edef
[ROCM] fix softmax with loss nan in HIP platform, test=develop (#31491)
4 years ago
Shang Zhizhou f57739be35
fix ernie_varlen when cutting head (#31497)
4 years ago
JamesLim 45c7d90564
Optimization of elementwise CUDA kernel (#30801)
4 years ago
YUNSHEN XIE 0b3c229606
Prec on mac (#31382)
4 years ago
Jacek Czaja 23d96cf221
[oneDNN] bumpup onednn 2.2 fixup version (#31473)
4 years ago
YUNSHEN XIE 390cebee15
Prec on windows exclude check_added_ut (#31372)
4 years ago
Zhou Wei 634a12b368
fix bug of windows chineses msvc (#31493)
4 years ago
wangguanzhong 43d6abf0a5
update conv2d, test=develop (#31480)
4 years ago
wangguanzhong 50af0c2cbb
fix roi_align, test=develop (#31479)
4 years ago
ronnywang e03e46730c
[ROCM] fix gather_op, sigmoid_cross_entropy_with_logits_op, test=develop (#31467)
4 years ago
Qi Li b85c8e03be
[ROCM] fix reduce op, test=develop (#31478)
4 years ago
Jacek Czaja 39a5424ed1
[oneDNN] elementwise add bf16 grad kernel with broadcasting (#31385)
4 years ago
石晓伟 5f6213217b
update zero_copy_tensor_test.cc for build of gcc485, test=develop (#31470)
4 years ago
Qi Li 133a914bd0
[ROCM] fix test_dist_op ci test, test=develop (#31468)
4 years ago
Qi Li f9377965c4
[ROCM] fix dropout and remove hipcub, test=develop (#31455)
4 years ago
Aurelius84 fadabbe9b0
[CustomOp] Automatically specify PADDLE_WITH_MKLDNN & Remove Interpreter argument (#31391)
4 years ago
Leo Chen ffdd5b7773
Fix cmake of cryptopp to avoid downloading every time (#31447)
4 years ago
石晓伟 bc7632be73
upgrade inference tensor apis, test=develop (#31402)
4 years ago
JamesLim 8491ae9a02
Creating a CUDA function to find the minimum value in warp or block (#31191)
4 years ago
Pei Yang 30717a6cbc
fix trt serialization on windows (#31438)
4 years ago
Pei Yang 1321c47950
add more info in trt engine serialization (#31434)
4 years ago
liuyuhui 9ebf05b003
[Kunlun]Multi xpu dygraph performance optimization , add distributed.spawn support for multi xpu and some bug-fixes (#31130)
4 years ago
Qi Li 4d647ec137
[ROCM] update fluid platform for rocm (part5), test=develop (#31315)
4 years ago
liym27 522c91ec67
[Dy2Stat] Remove gast.Index for compatibility of gast 0.4.0 (#31358)
4 years ago
YUNSHEN XIE 62289fccc0
fix python full coverage decrease issue (#31429)
4 years ago
Wilber c9a7bfec89
prepare remove grad script and update PADDLE_CI_INFERENCE pipeline (#31149)
4 years ago
Zhang Ting 7d95e598c1
support float16 for temporal_shift op (#31432)
4 years ago
YUNSHEN XIE 3a8ef10e09
fix modified_retry_method_only_win (#31404)
4 years ago
Zhang Ting dcce54ea76
improve performance of depthwise_conv2d (#31099)
4 years ago
wuhuanzhou 4d6d2db812
Windows system supports Ninja compilation (#31161)
4 years ago
liym27 0fff930667
Fix bug for set_value op when input dtype is not float32 (#31411)
4 years ago
Huihuang Zheng c40b98e068
Fix comment (#31424)
4 years ago
Huihuang Zheng 6bf02a1261
[Dy2stat] Fix Read-Only Attribute as while_loop Output (#31415)
4 years ago
jakpiase 5b4f8aac82
Added LSTM BF16 and fixed GRU BF16 (#31234)
4 years ago
Qi Li 7cdf6ea770
[ROCM] update fluid elementwise op for rocm (part10), test=develop (#31361)
4 years ago
Qi Li 84639b6193
[ROCM] update fluid operators for rocm (part3), test=develop (#31213)
4 years ago
Qi Li 3b9db17199
[ROCM] update fluid operators for rocm (part7), test=develop (#31307)
4 years ago
Qi Li db50fb6766
[ROCM] fix softmax with loss and update python scripts, test=develop (#31373)
4 years ago
Pei Yang 32211fe9c4
TRT conv2d converter support SAME padding (#31379)
4 years ago
Qi Li e312a1ff6e
[ROCM] update fluid operators for rocm (part9), test=develop (#31338)
4 years ago
Qi Li 6626c6a6ad
fix bert cu file compiler error, test=develop (#31389)
4 years ago
wuhuanzhou c1bc223695
compile with VS2017, test=develop (#31388)
4 years ago
Zhou Wei 13e4280f82
[Custom OP]polish doc of custom OP (#31369)
4 years ago
Qi Li 946dbdae8c
[ROCM] update fluid operators for rocm (part6), test=develop (#31301)
4 years ago
wangna11BD 1cbccfa594
Add attrs `deformable_groups` for deformable_conv API (#31335)
4 years ago
Shang Zhizhou 77c44e2f1b
change prelu plugin to tensorRT layer (#30210)
4 years ago

@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License
cmake_minimum_required(VERSION 3.10)
cmake_minimum_required(VERSION 3.15)
cmake_policy(VERSION 3.10)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
@ -38,11 +39,6 @@ endif()
if (WITH_GPU AND WITH_ASCEND)
message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time")
endif()
# cmake 3.12, 3.13, 3.14 will append gcc link options to nvcc, and nvcc doesn't recognize them.
if(WITH_GPU AND (${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.12) AND (${CMAKE_VERSION} VERSION_LESS 3.15))
message(FATAL_ERROR "cmake ${CMAKE_VERSION} is not supported when WITH_GPU=ON because of bug https://cmake.org/pipermail/cmake/2018-September/068195.html. "
"You can use cmake 3.16 (recommended), 3.10, 3.11, 3.15 or 3.17. Please refer to the install document: https://cmake.org/install/")
endif()
if(WITH_GPU AND NOT APPLE)
enable_language(CUDA)
@ -61,6 +57,7 @@ if(WITH_MUSL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy")
endif()
if(WIN32)
option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
@ -72,6 +69,13 @@ if(WIN32)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj")
if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Zc:inline")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zc:inline")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Zc:inline")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zc:inline")
endif()
if (MSVC_STATIC_CRT)
message(STATUS "Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd")
@ -88,7 +92,7 @@ if(WIN32)
endif()
endforeach(flag_var)
endif()
# NOTE(Avin0323): Less parallel count result in faster compilation.
math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")
# windows build turn off warnings, use parallel compiling.
@ -116,6 +120,10 @@ if(WIN32)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838")
foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS)
set(${flag_var} "${${flag_var}} /ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221")
endforeach(flag_var)
if (WITH_WIN_DUMP_DBG)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Zi")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi")

@ -74,7 +74,7 @@ endfunction()
# select_nvcc_arch_flags(out_variable)
function(select_nvcc_arch_flags out_variable)
# List of arch names
set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "All" "Manual")
set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "All" "Manual")
set(archs_name_default "Auto")
list(APPEND archs_names "Auto")
@ -91,7 +91,7 @@ function(select_nvcc_arch_flags out_variable)
if(${CUDA_ARCH_NAME} STREQUAL "Manual")
set(CUDA_ARCH_BIN ${paddle_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
set(CUDA_ARCH_PTX "" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
else()
unset(CUDA_ARCH_BIN CACHE)
@ -108,6 +108,8 @@ function(select_nvcc_arch_flags out_variable)
set(cuda_arch_bin "70")
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(cuda_arch_bin "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
set(cuda_arch_bin "80")
elseif(${CUDA_ARCH_NAME} STREQUAL "All")
set(cuda_arch_bin ${paddle_known_gpu_archs})
elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
@ -175,14 +177,22 @@ elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) # CUDA 9.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs9})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.x
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1
set(paddle_known_gpu_archs ${paddle_known_gpu_archs11})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+
set(paddle_known_gpu_archs "${paddle_known_gpu_archs11} 86")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
endif()
if (NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0)
@ -198,14 +208,11 @@ select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
# Set C++11 support
# Set C++14 support
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
if (NOT WIN32) # windows msvc2015 support c++11 natively.
# -std=c++11 -fPIC not recoginize by msvc, -Xcompiler will be added by cmake.
set(CMAKE_CUDA_STANDARD 11)
endif(NOT WIN32)
set(CMAKE_CUDA_STANDARD 14)
# (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
# So replace /W[1-4] with /W0

@ -94,7 +94,7 @@ macro(find_cudnn_version cudnn_header_file)
"${CUDNN_MAJOR_VERSION} * 1000 +
${CUDNN_MINOR_VERSION} * 100 + ${CUDNN_PATCHLEVEL_VERSION}")
message(STATUS "Current cuDNN header is ${cudnn_header_file} "
"Current cuDNN version is v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}. ")
"Current cuDNN version is v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCHLEVEL_VERSION}. ")
endif()
endif()
endmacro()

@ -22,6 +22,13 @@ SET(CRYPTOPP_TAG CRYPTOPP_8_2_0)
IF(WIN32)
SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/cryptopp-static.lib" CACHE FILEPATH "cryptopp library." FORCE)
# There is a compilation parameter 'FI\"winapifamily.h\"' can't be used correctly
# with Ninja on Windows. The only difference between the patch file and original
# file is that the compilation parameters are changed to 'FIwinapifamily.h'. This
# patch command can be removed when upgrading to a higher version.
if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
set(CRYPTOPP_PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different "${PADDLE_SOURCE_DIR}/patches/cryptopp/CMakeLists.txt" "<SOURCE_DIR>/")
endif()
ELSE(WIN32)
SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/libcryptopp.a" CACHE FILEPATH "cryptopp library." FORCE)
ENDIF(WIN32)
@ -53,11 +60,13 @@ ExternalProject_Add(
"${CRYPTOPP_DOWNLOAD_CMD}"
PREFIX ${CRYPTOPP_PREFIX_DIR}
SOURCE_DIR ${CRYPTOPP_SOURCE_DIR}
UPDATE_COMMAND ""
PATCH_COMMAND
COMMAND ${CMAKE_COMMAND} -E remove_directory "<SOURCE_DIR>/cmake/"
COMMAND git clone ${GIT_URL}/noloader/cryptopp-cmake "<SOURCE_DIR>/cmake"
COMMAND cd "<SOURCE_DIR>/cmake" && git checkout tags/${CRYPTOPP_TAG} -b ${CRYPTOPP_TAG}
COMMAND ${CMAKE_COMMAND} -E copy_directory "<SOURCE_DIR>/cmake/" "<SOURCE_DIR>/"
COMMAND ${CRYPTOPP_PATCH_COMMAND}
INSTALL_DIR ${CRYPTOPP_INSTALL_DIR}
CMAKE_ARGS ${CRYPTOPP_CMAKE_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CRYPTOPP_INSTALL_DIR}

@ -20,7 +20,7 @@ SET(MKLDNN_SOURCE_DIR ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn)
SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn)
SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
SET(MKLDNN_REPOSITORY ${GIT_URL}/oneapi-src/oneDNN.git)
SET(MKLDNN_TAG 3d53cd3f17ce7ca365c980f0e1e50359751ca038)
SET(MKLDNN_TAG 72efa005effb49595933e033cc732f215ef0445a)
# Introduce variables:
# * CMAKE_INSTALL_LIBDIR

@ -250,5 +250,8 @@ IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE}
CACHE FILEPATH "protobuf executable." FORCE)
# `EXTERN_PROTOBUF_DEPEND` used in cmake function `proto_library` to ensure
# `protoc.exe` existed before calling it.
set(EXTERN_PROTOBUF_DEPEND extern_protobuf)
PROMPT_PROTOBUF_LIB(extern_protobuf)
ENDIF(NOT PROTOBUF_FOUND)

@ -14,11 +14,15 @@
INCLUDE(ExternalProject)
IF(WITH_ROCM)
add_definitions(-DWARPCTC_WITH_HIP)
ENDIF()
SET(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc)
SET(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc)
SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git)
set(WARPCTC_TAG 95a461eddeabd51099ef059dcfada1117eb1bfb8)
set(WARPCTC_TAG c690fc5755abbdbdc98ef78d51ec10a6748a8cd1)
SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
CACHE PATH "Warp-ctc Directory" FORCE)
@ -49,14 +53,15 @@ ExternalProject_Add(
BUILD_ALWAYS 1
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=$<FILTER:${CMAKE_C_FLAGS},EXCLUDE,/Zc:inline>
-DCMAKE_C_FLAGS_DEBUG=$<FILTER:${CMAKE_C_FLAGS_DEBUG},EXCLUDE,/Zc:inline>
-DCMAKE_C_FLAGS_RELEASE=$<FILTER:${CMAKE_C_FLAGS_RELEASE},EXCLUDE,/Zc:inline>
-DCMAKE_CXX_FLAGS=$<FILTER:${CMAKE_CXX_FLAGS},EXCLUDE,/Zc:inline>
-DCMAKE_CXX_FLAGS_RELEASE=$<FILTER:${CMAKE_CXX_FLAGS_RELEASE},EXCLUDE,/Zc:inline>
-DCMAKE_CXX_FLAGS_DEBUG=$<FILTER:${CMAKE_CXX_FLAGS_DEBUG},EXCLUDE,/Zc:inline>
-DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
-DWITH_GPU=${WITH_GPU}
-DWITH_ROCM=${WITH_ROCM}
-DWITH_OMP=${USE_OMP}
-DWITH_TORCH=OFF
-DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON

@ -13,7 +13,7 @@ if(NOT XPU_SDK_ROOT)
elseif(WITH_SUNWAY)
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2021_01_13.tar.gz" CACHE STRING "" FORCE)
else()
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_02_27.tar.gz" CACHE STRING "" FORCE)
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_03_30.tar.gz" CACHE STRING "" FORCE)
endif()
SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")

@ -4,10 +4,10 @@ include(CheckCCompilerFlag)
include(CheckCXXSymbolExists)
include(CheckTypeSize)
function(CheckCompilerCXX11Flag)
function(CheckCompilerCXX14Flag)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8)
message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.")
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 5.4)
message(FATAL_ERROR "Unsupported GCC version. GCC >= 5.4 required.")
elseif(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.2)
message(WARNING "Found GCC ${CMAKE_CXX_COMPILER_VERSION} which is too high, recommended to use GCC 8.2")
endif()
@ -20,23 +20,15 @@ function(CheckCompilerCXX11Flag)
message(FATAL_ERROR "Unsupported AppleClang version. AppleClang >= 5.1 required.")
endif()
else()
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3)
message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.3 required.")
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.4)
message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.4 required.")
endif()
endif()
endif()
endfunction()
CheckCompilerCXX11Flag()
if (WITH_GPU)
if (${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
CheckCompilerCXX14Flag()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
# safe_set_flag
#
# Set a compile flag only if compiler is support

@ -260,8 +260,8 @@ function(merge_static_libs TARGET_NAME)
# msvc will put libarary in directory of "/Release/xxxlib" by default
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND cmake -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}"
COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/lib${TARGET_NAME}.lib ${libfiles}
COMMAND cmake -E make_directory $<TARGET_FILE_DIR:${TARGET_NAME}>
COMMAND lib /OUT:$<TARGET_FILE:${TARGET_NAME}> ${libfiles}
)
endif(WIN32)
endfunction(merge_static_libs)
@ -492,7 +492,7 @@ function(nv_library TARGET_NAME)
message(FATAL "Please specify source file or library in nv_library.")
endif()
endif(nv_library_SRCS)
if (WIN32 AND ${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
if((CUDA_VERSION GREATER 9.2) AND (CUDA_VERSION LESS 11.0) AND (MSVC_VERSION LESS 1910))
set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
endif()
endif()
@ -510,7 +510,7 @@ function(nv_binary TARGET_NAME)
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
common_link(${TARGET_NAME})
endif()
if (WIN32 AND ${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
if((CUDA_VERSION GREATER 9.2) AND (CUDA_VERSION LESS 11.0) AND (MSVC_VERSION LESS 1910))
set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
endif()
endif()
@ -537,7 +537,7 @@ function(nv_test TARGET_NAME)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
if (WIN32 AND ${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
if((CUDA_VERSION GREATER 9.2) AND (CUDA_VERSION LESS 11.0) AND (MSVC_VERSION LESS 1910))
set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
endif()
endif()
@ -753,7 +753,8 @@ function(paddle_protobuf_generate_cpp SRCS HDRS)
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
-I${CMAKE_CURRENT_SOURCE_DIR}
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL}
DEPENDS ${ABS_FIL} protoc
# Set `EXTERN_PROTOBUF_DEPEND` only if need to compile `protoc.exe`.
DEPENDS ${ABS_FIL} ${EXTERN_PROTOBUF_DEPEND}
COMMENT "Running C++ protocol buffer compiler on ${FIL}"
VERBATIM )
endforeach()
@ -794,7 +795,8 @@ function(py_test TARGET_NAME)
if(WITH_COVERAGE AND NOT (WITH_INCREMENTAL_COVERAGE AND "$ENV{PADDLE_GIT_DIFF_PY_FILE}" STREQUAL ""))
add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true ${py_test_ENVS}
FLAGS_cpu_deterministic=true
PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data
${PYTHON_EXECUTABLE} -m coverage run --branch -p ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})

@ -164,11 +164,11 @@ copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_INSTALL_DIR})
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
if(WIN32)
if(WITH_STATIC_LIB)
set(paddle_inference_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/libpaddle_inference.lib
${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_inference.*)
set(paddle_inference_lib $<TARGET_FILE_DIR:paddle_inference>/libpaddle_inference.lib
$<TARGET_FILE_DIR:paddle_inference>/paddle_inference.*)
else()
set(paddle_inference_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_inference.dll
${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_inference.lib)
set(paddle_inference_lib $<TARGET_FILE_DIR:paddle_inference_shared>/paddle_inference.dll
$<TARGET_FILE_DIR:paddle_inference_shared>/paddle_inference.lib)
endif()
copy(inference_lib_dist
SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib}
@ -192,6 +192,15 @@ include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/extension/include/*
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex64.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex128.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/float16.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
# CAPI inference library for only inference
set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_install_dir" CACHE STRING
@ -200,7 +209,7 @@ copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_C_INSTALL_DIR})
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
if(WIN32)
set(paddle_inference_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/${CMAKE_BUILD_TYPE}/paddle_inference_c.*)
set(paddle_inference_c_lib $<TARGET_FILE_DIR:paddle_inference_c>/paddle_inference_c.*)
else(WIN32)
set(paddle_inference_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_inference_c.*)
endif(WIN32)

@ -18,6 +18,10 @@ if(NOT WIN32)
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG")
set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG")
else()
# It has not been used now, it can specify CUDA compile flag manualy,
# its use is to remvoe /Zi to reduce GPU static library size. But it's dangerous
# because CUDA will update by nvidia, then error will occur.
# Now, it's used in CUDA:[10.0, 10.2]
set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props)
endif()

@ -15,7 +15,7 @@
<Warning>InheritFromHost</Warning>
<BaseCommandLineTemplate>-ccbin "%(VCBinDir)" -x cu [GenerateRelocatableDeviceCode] [Include] [RequiredIncludes] [InterleaveSourceInPTX] [GPUDebugInfo] [GenerateLineInfo] [Keep] [KeepDir] [MaxRegCount] [PtxAsOptionV] [TargetMachinePlatform] [NvccCompilation] [CudaRuntime] [AdditionalOptions]</BaseCommandLineTemplate>
<BuildCommandLineTemplate>--use-local-env --cl-version $(CudaClVersion)</BuildCommandLineTemplate>
<BuildCommandLineTemplate>--use-local-env $(CudaClVersion)</BuildCommandLineTemplate>
<BuildDynamicCommandLineTemplate>[CodeGeneration]</BuildDynamicCommandLineTemplate>
<CleanCommandLineTemplate>-clean</CleanCommandLineTemplate>
<!-- <HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] $(CudaForceSynchronousPdbWrites) /Zi [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate> -->

@ -222,7 +222,7 @@ if(WITH_MKLDNN)
endif()
include(external/protobuf) # find first, then download, build, install protobuf
if(NOT PROTOBUF_FOUND OR WIN32)
if(TARGET extern_protobuf)
list(APPEND third_party_deps extern_protobuf)
endif()
@ -317,6 +317,7 @@ endif (WITH_LITE)
if (WITH_CRYPTO)
include(external/cryptopp) # download, build, install cryptopp
list(APPEND third_party_deps extern_cryptopp)
add_definitions(-DPADDLE_WITH_CRYPTO)
endif (WITH_CRYPTO)

@ -50,6 +50,7 @@ output_data := value.Interface().([][]float32)
运行
```bash
go mod init github.com/paddlepaddle
export LD_LIBRARY_PATH=`pwd`/paddle_c/paddle/lib:$LD_LIBRARY_PATH
go run ./demo/mobilenet.go
```

@ -13,7 +13,7 @@
// limitations under the License.
package main
import "../paddle"
import "github.com/paddlepaddle/paddle"
import "strings"
import "io/ioutil"
import "strconv"

@ -15,7 +15,7 @@
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <paddle_c_api.h>
import "C"

@ -15,7 +15,7 @@
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <stdlib.h>
// #include <paddle_c_api.h>

@ -15,7 +15,7 @@
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include "paddle_c_api.h"
import "C"
@ -88,7 +88,7 @@ func (predictor *Predictor) GetInputNames() []string {
}
func (predictor *Predictor) GetOutputNames() []string {
names := make([]string, predictor.GetInputNum())
names := make([]string, predictor.GetOutputNum())
for i := 0; i < len(names); i++ {
names[i] = predictor.GetOutputName(i)
}

@ -15,7 +15,7 @@
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <stdlib.h>
// #include <string.h>
@ -209,7 +209,7 @@ func DecodeTensor(r *bytes.Reader, shape []int32, t reflect.Type, ptr reflect.Va
value := reflect.Indirect(ptr)
value.Set(reflect.MakeSlice(t, int(shape[0]), int(shape[0])))
if len(shape) == 1 && value.Len() > 0 {
switch value.Index(1).Kind() {
switch value.Index(0).Kind() {
case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
binary.Read(r, Endian(), value.Interface())
return

@ -47,6 +47,22 @@ namespace paddle {
} \
}()
#define PD_DISPATCH_FLOATING_AND_HALF_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT16, paddle::float16, \
__VA_ARGS__) \
default: \
PD_THROW("function " #NAME " is not implemented for data type `", \
::paddle::ToString(__dtype__), "`"); \
} \
}()
///////// Integral Dispatch Marco ///////////
#define PD_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) \
@ -68,6 +84,22 @@ namespace paddle {
} \
}()
///////// Complex Dispatch Marco ///////////
#define PD_DISPATCH_COMPLEX_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX64, \
::paddle::complex64, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX128, \
::paddle::complex128, __VA_ARGS__) \
default: \
PD_THROW("function " #NAME " is not implemented for data type `" + \
::paddle::ToString(__dtype__) + "`"); \
} \
}()
///////// Floating and Integral Dispatch Marco ///////////
#define PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES(TYPE, NAME, ...) \
@ -93,6 +125,55 @@ namespace paddle {
} \
}()
///////// Floating and Complex Dispatch Marco ///////////
#define PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX64, \
::paddle::complex64, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX128, \
::paddle::complex128, __VA_ARGS__) \
default: \
PD_THROW("function " #NAME " is not implemented for data type `" + \
::paddle::ToString(__dtype__) + "`"); \
} \
}()
///////// Floating, Integral and Complex Dispatch Marco ///////////
#define PD_DISPATCH_FLOATING_AND_INTEGRAL_AND_COMPLEX_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT64, int64_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT8, int8_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::UINT8, uint8_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT16, int16_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX64, \
::paddle::complex64, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX128, \
::paddle::complex128, __VA_ARGS__) \
default: \
PD_THROW("function " #NAME " is not implemented for data type `" + \
::paddle::ToString(__dtype__) + "`"); \
} \
}()
// TODO(chenweihang): Add more Marcos in the future if needed
} // namespace paddle

@ -16,10 +16,17 @@ limitations under the License. */
#include <cstdint>
#include <string>
#include "complex128.h" // NOLINT
#include "complex64.h" // NOLINT
#include "ext_exception.h" // NOLINT
#include "float16.h" // NOLINT
namespace paddle {
using complex64 = paddle::platform::complex64;
using complex128 = paddle::platform::complex128;
using float16 = paddle::platform::float16;
enum class DataType {
BOOL,
INT8,
@ -27,8 +34,11 @@ enum class DataType {
INT16,
INT32,
INT64,
FLOAT16,
FLOAT32,
FLOAT64,
COMPLEX64,
COMPLEX128,
// TODO(JiabinYang) support more data types if needed.
};
@ -46,24 +56,33 @@ inline std::string ToString(DataType dtype) {
return "int32_t";
case DataType::INT64:
return "int64_t";
case DataType::FLOAT16:
return "float16";
case DataType::FLOAT32:
return "float";
case DataType::FLOAT64:
return "double";
case DataType::COMPLEX64:
return "complex64";
case DataType::COMPLEX128:
return "complex128";
default:
PD_THROW("Unsupported paddle enum data type.");
}
}
#define PD_FOR_EACH_DATA_TYPE(_) \
_(bool, DataType::BOOL) \
_(int8_t, DataType::INT8) \
_(uint8_t, DataType::UINT8) \
_(int16_t, DataType::INT16) \
_(int, DataType::INT32) \
_(int64_t, DataType::INT64) \
_(float, DataType::FLOAT32) \
_(double, DataType::FLOAT64)
#define PD_FOR_EACH_DATA_TYPE(_) \
_(bool, DataType::BOOL) \
_(int8_t, DataType::INT8) \
_(uint8_t, DataType::UINT8) \
_(int16_t, DataType::INT16) \
_(int, DataType::INT32) \
_(int64_t, DataType::INT64) \
_(float16, DataType::FLOAT16) \
_(float, DataType::FLOAT32) \
_(double, DataType::FLOAT64) \
_(complex64, DataType::COMPLEX64) \
_(complex128, DataType::COMPLEX128)
template <paddle::DataType T>
struct DataTypeToCPPType;

@ -102,7 +102,7 @@ class ErrorMessage {
do { \
auto __message__ = ::paddle::ErrorMessage(__VA_ARGS__).to_string(); \
throw ::paddle::PD_Exception(__message__, __FILE__, __LINE__, \
"An error occured."); \
"An error occurred."); \
} while (0)
} // namespace paddle

File diff suppressed because it is too large Load Diff

@ -52,6 +52,9 @@ class PD_DLL_DECL Tensor {
/// \brief Construct a Tensor on target Place for CustomOp.
/// Generally it's only used for user to create Tensor.
explicit Tensor(const PlaceType& place);
/// \brief Construct a Tensor on target Place with shape for CustomOp.
/// Generally it's only used for user to create Tensor.
Tensor(const PlaceType& place, const std::vector<int64_t>& shape);
/// \brief Reset the shape of the tensor.
/// Generally it's only used for the input tensor.
/// Reshape must be called before calling

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save