You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/fluid/framework/details/CMakeLists.txt

146 lines
8.9 KiB

7 years ago
cc_library(var_handle SRCS var_handle.cc DEPS place framework_proto node)
7 years ago
cc_library(op_handle_base SRCS op_handle_base.cc DEPS var_handle device_context lod_tensor)
7 years ago
cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory)
cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory)
cc_library(fetch_async_op_handle SRCS fetch_async_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory)
cc_library(share_tensor_buffer_functor SRCS share_tensor_buffer_functor.cc DEPS framework_proto scope place operator op_registry)
7 years ago
cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(share_tensor_buffer_op_handle SRCS share_tensor_buffer_op_handle.cc DEPS op_handle_base scope computation_op_handle share_tensor_buffer_functor)
cc_library(rpc_op_handle SRCS rpc_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(fetch_barrier_op_handle SRCS fetch_barrier_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(multi_devices_helper SRCS multi_devices_helper.cc DEPS graph graph_helper)
cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows)
if(WITH_PSCORE)
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
set_source_files_properties(reduce_op_handle.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(threaded_ssa_graph_executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(async_ssa_graph_executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
endif()
7 years ago
if(WITH_GPU)
nv_library(nan_inf_utils SRCS nan_inf_utils_detail.cc nan_inf_utils_detail.cu DEPS framework_proto scope place)
7 years ago
nv_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda variable_visitor)
nv_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda variable_visitor place device_memory_aligment)
nv_library(grad_merge_all_reduce_op_handle SRCS grad_merge_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor
ddim memory dynload_cuda variable_visitor place device_memory_aligment all_reduce_op_handle fused_all_reduce_op_handle)
if(WITH_DGC)
nv_library(sparse_all_reduce_op_handle SRCS sparse_all_reduce_op_handle.cc DEPS op_handle_base scope
lod_tensor ddim memory dynload_cuda variable_visitor dgc all_reduce_op_handle)
endif()
if(WITH_DISTRIBUTE)
nv_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
ddim dynload_cuda selected_rows_functor)
else()
nv_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
ddim dynload_cuda selected_rows_functor)
endif()
nv_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda)
nv_library(fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle)
elseif(WITH_ROCM)
hip_library(nan_inf_utils SRCS nan_inf_utils_detail.cc nan_inf_utils_detail.cu DEPS framework_proto scope place)
hip_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda variable_visitor)
hip_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda variable_visitor place device_memory_aligment)
hip_library(grad_merge_all_reduce_op_handle SRCS grad_merge_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor
ddim memory dynload_cuda variable_visitor place device_memory_aligment all_reduce_op_handle fused_all_reduce_op_handle)
if(WITH_DISTRIBUTE)
hip_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
ddim dynload_cuda selected_rows_functor)
else()
hip_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
ddim dynload_cuda selected_rows_functor)
endif()
hip_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda)
hip_library(fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle)
7 years ago
else()
cc_library(nan_inf_utils SRCS nan_inf_utils_detail.cc DEPS framework_proto scope place)
cc_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
7 years ago
variable_visitor)
cc_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
variable_visitor place device_memory_aligment)
cc_library(grad_merge_all_reduce_op_handle SRCS grad_merge_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor
ddim memory variable_visitor place device_memory_aligment all_reduce_op_handle fused_all_reduce_op_handle)
if(WITH_DISTRIBUTE)
cc_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
ddim selected_rows_functor)
else()
cc_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
ddim selected_rows_functor)
endif()
cc_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor)
cc_library(fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle)
7 years ago
endif()
cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor)
cc_library(eager_deletion_op_handle SRCS eager_deletion_op_handle.cc DEPS lod_tensor selected_rows reference_count_pass_helper)
Enable the detection of subgraph composed of grad ops (#21223) * Add the first implememtation of fusion_group op #19621 (#3) * Add the dynamic load of nvrtc, and support runtime compiling of CUDA kernel using nvrtc. test=develop * Call CUDA driver api to launch the kernel compiled by nvrtc. test=develop * Disable for mac and windows. test=develop * Refine the codes to support manually specified num_threads and workload_per_thread. test=develop * Refine the CUDA kernel to support large dims. test=develop * Add DeviceCodePool to manage all device codes. * Add the first implementation fusion_group op. * Add unit-test for fusion_group op. * Add the check of result. * Add the check of nvrtc in unit-test. test=develop * Add comment to explain the inputs, outputs and features of fusion_group op. test=develop * Disable fusion_group op for mac and windows. test=develop * Make the compiling of device code return status instead of hanging up. test=develop * Add the check of whether there is CUDA driver library, and do not core dump when failing to call the CUDA driver API. * Unify fusion_group_op's input and output names. test=develop * Add the check of CUDA driver library in unittest. test=develop * Enable generating code for a given subgraph. #21126 (#4) * Enable generating code for a given subgraph. * Support sorting the subgraph. * Remove the rearange of expressions because we use the sorted subgraph directly. * Enable generating code for a subgraph which is composed of grad ops. * Use expression information to check the accuracy in unittest. * Separate load and store from computation expressions. test=develop * Improve the loading statements in generated codes. test=develop * Remove unused arguments from formal list. test=develop * Enable the detection of subgraph of grad ops. * Generate code for detected subgraph in fusion_group_pass. * Add an option in BuildStrategy to enable fusion_group_pass and add unittest. test=develop * Fix a bug when checking whether the shape of all inputs are the same. * Add debug information. * Remove subgraph_detector from inference/analysis to the common framework/ir directory. (#5) test=develop * Call subgraph_detector in fusion_group pass. test=develop * Disable fusion_group when WITH_GPU is OFF. test=develop * Refine all PADDLE_ENFORCE message. test=develop * Fix the case that some inputs are not defined in grad ops, and set op_role for fused op. test=develop * Follow review comments. test=develop
5 years ago
set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto
multi_devices_helper
Enable the detection of subgraph composed of grad ops (#21223) * Add the first implememtation of fusion_group op #19621 (#3) * Add the dynamic load of nvrtc, and support runtime compiling of CUDA kernel using nvrtc. test=develop * Call CUDA driver api to launch the kernel compiled by nvrtc. test=develop * Disable for mac and windows. test=develop * Refine the codes to support manually specified num_threads and workload_per_thread. test=develop * Refine the CUDA kernel to support large dims. test=develop * Add DeviceCodePool to manage all device codes. * Add the first implementation fusion_group op. * Add unit-test for fusion_group op. * Add the check of result. * Add the check of nvrtc in unit-test. test=develop * Add comment to explain the inputs, outputs and features of fusion_group op. test=develop * Disable fusion_group op for mac and windows. test=develop * Make the compiling of device code return status instead of hanging up. test=develop * Add the check of whether there is CUDA driver library, and do not core dump when failing to call the CUDA driver API. * Unify fusion_group_op's input and output names. test=develop * Add the check of CUDA driver library in unittest. test=develop * Enable generating code for a given subgraph. #21126 (#4) * Enable generating code for a given subgraph. * Support sorting the subgraph. * Remove the rearange of expressions because we use the sorted subgraph directly. * Enable generating code for a subgraph which is composed of grad ops. * Use expression information to check the accuracy in unittest. * Separate load and store from computation expressions. test=develop * Improve the loading statements in generated codes. test=develop * Remove unused arguments from formal list. test=develop * Enable the detection of subgraph of grad ops. * Generate code for detected subgraph in fusion_group_pass. * Add an option in BuildStrategy to enable fusion_group_pass and add unittest. test=develop * Fix a bug when checking whether the shape of all inputs are the same. * Add debug information. * Remove subgraph_detector from inference/analysis to the common framework/ir directory. (#5) test=develop * Call subgraph_detector in fusion_group pass. test=develop * Disable fusion_group when WITH_GPU is OFF. test=develop * Refine all PADDLE_ENFORCE message. test=develop * Fix the case that some inputs are not defined in grad ops, and set op_role for fused op. test=develop * Follow review comments. test=develop
5 years ago
sequential_execution_pass
modify_op_lock_and_record_event_pass
all_reduce_deps_pass
reference_count_pass
eager_deletion_pass
buffer_shared_inplace_op_pass
buffer_shared_cross_op_memory_reuse_pass
inplace_addto_op_pass
set_reader_device_info_utils
add_reader_dependency_pass)
6 years ago
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ${SSA_GRAPH_EXECUTOR_DEPS})
cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
simple_threadpool device_context)
cc_library(parallel_ssa_graph_executor SRCS parallel_ssa_graph_executor.cc DEPS threaded_ssa_graph_executor)
set(ASYNC_SSA_GRAPH_EXECUTOR_DEPS threaded_ssa_graph_executor)
cc_library(async_ssa_graph_executor SRCS async_ssa_graph_executor.cc DEPS ${ASYNC_SSA_GRAPH_EXECUTOR_DEPS})
6 years ago
7 years ago
cc_test(broadcast_op_test SRCS broadcast_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
device_context broadcast_op_handle)
7 years ago
cc_test(gather_op_test SRCS gather_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
device_context gather_op_handle)
cc_library(scope_buffered_monitor SRCS scope_buffered_monitor.cc DEPS scope profiler selected_rows)
cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_executor.cc DEPS ssa_graph_executor scope_buffered_monitor)
#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
# device_context reduce_op_handle )
cc_library(bind_threaded_ssa_graph_executor SRCS bind_threaded_ssa_graph_executor.cc
DEPS fetch_op_handle gflags ssa_graph_executor scope simple_threadpool device_context)
7 years ago
cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc
DEPS fetch_async_op_handle ssa_graph_executor scope simple_threadpool device_context)
cc_test(fused_broadcast_op_test SRCS fused_broadcast_op_handle_test.cc DEPS fused_broadcast_op_handle)
cc_test(exception_holder_test SRCS exception_holder_test.cc )
Enable the detection of subgraph composed of grad ops (#21223) * Add the first implememtation of fusion_group op #19621 (#3) * Add the dynamic load of nvrtc, and support runtime compiling of CUDA kernel using nvrtc. test=develop * Call CUDA driver api to launch the kernel compiled by nvrtc. test=develop * Disable for mac and windows. test=develop * Refine the codes to support manually specified num_threads and workload_per_thread. test=develop * Refine the CUDA kernel to support large dims. test=develop * Add DeviceCodePool to manage all device codes. * Add the first implementation fusion_group op. * Add unit-test for fusion_group op. * Add the check of result. * Add the check of nvrtc in unit-test. test=develop * Add comment to explain the inputs, outputs and features of fusion_group op. test=develop * Disable fusion_group op for mac and windows. test=develop * Make the compiling of device code return status instead of hanging up. test=develop * Add the check of whether there is CUDA driver library, and do not core dump when failing to call the CUDA driver API. * Unify fusion_group_op's input and output names. test=develop * Add the check of CUDA driver library in unittest. test=develop * Enable generating code for a given subgraph. #21126 (#4) * Enable generating code for a given subgraph. * Support sorting the subgraph. * Remove the rearange of expressions because we use the sorted subgraph directly. * Enable generating code for a subgraph which is composed of grad ops. * Use expression information to check the accuracy in unittest. * Separate load and store from computation expressions. test=develop * Improve the loading statements in generated codes. test=develop * Remove unused arguments from formal list. test=develop * Enable the detection of subgraph of grad ops. * Generate code for detected subgraph in fusion_group_pass. * Add an option in BuildStrategy to enable fusion_group_pass and add unittest. test=develop * Fix a bug when checking whether the shape of all inputs are the same. * Add debug information. * Remove subgraph_detector from inference/analysis to the common framework/ir directory. (#5) test=develop * Call subgraph_detector in fusion_group pass. test=develop * Disable fusion_group when WITH_GPU is OFF. test=develop * Refine all PADDLE_ENFORCE message. test=develop * Fix the case that some inputs are not defined in grad ops, and set op_role for fused op. test=develop * Follow review comments. test=develop
5 years ago
set(IR_PASS_DEPS graph_viz_pass multi_devices_graph_pass
multi_devices_graph_print_pass multi_devices_graph_check_pass
fuse_elewise_add_act_pass fuse_bn_act_pass fuse_bn_add_act_pass
Enable the detection of subgraph composed of grad ops (#21223) * Add the first implememtation of fusion_group op #19621 (#3) * Add the dynamic load of nvrtc, and support runtime compiling of CUDA kernel using nvrtc. test=develop * Call CUDA driver api to launch the kernel compiled by nvrtc. test=develop * Disable for mac and windows. test=develop * Refine the codes to support manually specified num_threads and workload_per_thread. test=develop * Refine the CUDA kernel to support large dims. test=develop * Add DeviceCodePool to manage all device codes. * Add the first implementation fusion_group op. * Add unit-test for fusion_group op. * Add the check of result. * Add the check of nvrtc in unit-test. test=develop * Add comment to explain the inputs, outputs and features of fusion_group op. test=develop * Disable fusion_group op for mac and windows. test=develop * Make the compiling of device code return status instead of hanging up. test=develop * Add the check of whether there is CUDA driver library, and do not core dump when failing to call the CUDA driver API. * Unify fusion_group_op's input and output names. test=develop * Add the check of CUDA driver library in unittest. test=develop * Enable generating code for a given subgraph. #21126 (#4) * Enable generating code for a given subgraph. * Support sorting the subgraph. * Remove the rearange of expressions because we use the sorted subgraph directly. * Enable generating code for a subgraph which is composed of grad ops. * Use expression information to check the accuracy in unittest. * Separate load and store from computation expressions. test=develop * Improve the loading statements in generated codes. test=develop * Remove unused arguments from formal list. test=develop * Enable the detection of subgraph of grad ops. * Generate code for detected subgraph in fusion_group_pass. * Add an option in BuildStrategy to enable fusion_group_pass and add unittest. test=develop * Fix a bug when checking whether the shape of all inputs are the same. * Add debug information. * Remove subgraph_detector from inference/analysis to the common framework/ir directory. (#5) test=develop * Call subgraph_detector in fusion_group pass. test=develop * Disable fusion_group when WITH_GPU is OFF. test=develop * Refine all PADDLE_ENFORCE message. test=develop * Fix the case that some inputs are not defined in grad ops, and set op_role for fused op. test=develop * Follow review comments. test=develop
5 years ago
multi_batch_merge_pass
fuse_relu_depthwise_conv_pass
lock_free_optimize_pass
coalesce_grad_tensor_pass fuse_all_reduce_op_pass backward_optimizer_op_deps_pass
fuse_adam_op_pass fuse_sgd_op_pass fuse_momentum_op_pass
sync_batch_norm_pass runtime_context_cache_pass)
if(NOT APPLE AND NOT WIN32 AND (WITH_GPU OR WITH_ROCM))
Enable the detection of subgraph composed of grad ops (#21223) * Add the first implememtation of fusion_group op #19621 (#3) * Add the dynamic load of nvrtc, and support runtime compiling of CUDA kernel using nvrtc. test=develop * Call CUDA driver api to launch the kernel compiled by nvrtc. test=develop * Disable for mac and windows. test=develop * Refine the codes to support manually specified num_threads and workload_per_thread. test=develop * Refine the CUDA kernel to support large dims. test=develop * Add DeviceCodePool to manage all device codes. * Add the first implementation fusion_group op. * Add unit-test for fusion_group op. * Add the check of result. * Add the check of nvrtc in unit-test. test=develop * Add comment to explain the inputs, outputs and features of fusion_group op. test=develop * Disable fusion_group op for mac and windows. test=develop * Make the compiling of device code return status instead of hanging up. test=develop * Add the check of whether there is CUDA driver library, and do not core dump when failing to call the CUDA driver API. * Unify fusion_group_op's input and output names. test=develop * Add the check of CUDA driver library in unittest. test=develop * Enable generating code for a given subgraph. #21126 (#4) * Enable generating code for a given subgraph. * Support sorting the subgraph. * Remove the rearange of expressions because we use the sorted subgraph directly. * Enable generating code for a subgraph which is composed of grad ops. * Use expression information to check the accuracy in unittest. * Separate load and store from computation expressions. test=develop * Improve the loading statements in generated codes. test=develop * Remove unused arguments from formal list. test=develop * Enable the detection of subgraph of grad ops. * Generate code for detected subgraph in fusion_group_pass. * Add an option in BuildStrategy to enable fusion_group_pass and add unittest. test=develop * Fix a bug when checking whether the shape of all inputs are the same. * Add debug information. * Remove subgraph_detector from inference/analysis to the common framework/ir directory. (#5) test=develop * Call subgraph_detector in fusion_group pass. test=develop * Disable fusion_group when WITH_GPU is OFF. test=develop * Refine all PADDLE_ENFORCE message. test=develop * Fix the case that some inputs are not defined in grad ops, and set op_role for fused op. test=develop * Follow review comments. test=develop
5 years ago
set(IR_PASS_DEPS ${IR_PASS_DEPS} fusion_group_pass)
endif()
cc_library(build_strategy SRCS build_strategy.cc DEPS pass_builder ${IR_PASS_DEPS})
if (WITH_MKLDNN)
target_link_libraries(build_strategy mkldnn_placement_pass)
endif()