diff --git a/cmake/external/ascend.cmake b/cmake/external/ascend.cmake index e3279a1e20..bddd2023b4 100644 --- a/cmake/external/ascend.cmake +++ b/cmake/external/ascend.cmake @@ -57,28 +57,24 @@ if(WITH_ASCEND) SET_PROPERTY(TARGET atlas_acl PROPERTY IMPORTED_LOCATION ${atlas_acl_lib}) add_custom_target(extern_ascend DEPENDS ascend_ge ascend_graph atlas_acl) +endif() -elseif(WITH_ASCEND_CL) - set(ASCEND_ATC_DIR ${ASCEND_DIR}/atc/lib64) - set(ASCEND_ACL_DIR ${ASCEND_DIR}/acllib/lib64) - set(STATIC_ACL_LIB ${ASCEND_ACL_DIR}) +if(WITH_ASCEND_CL) + set(ASCEND_CL_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64) - set(ATLAS_ACL_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64) - set(ATLAS_ATC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64) - - set(atlas_acl_lib ${ATLAS_ACL_DIR}/libascendcl.so) - set(atlas_acl_op_compiler_lib ${ATLAS_ACL_DIR}/libacl_op_compiler.so) - set(ATLAS_ACL_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include) + set(ascendcl_lib ${ASCEND_CL_DIR}/libascendcl.so) + set(acl_op_compiler_lib ${ASCEND_CL_DIR}/libacl_op_compiler.so) + set(ASCEND_CL_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include) - message(STATUS "ATLAS_ACL_INC_DIR ${ATLAS_ACL_INC_DIR}") - message(STATUS "ATLAS_ACL_LIB_DIR ${ATLAS_ACL_DIR}") - INCLUDE_DIRECTORIES(${ATLAS_ACL_INC_DIR}) + message(STATUS "ASCEND_CL_INC_DIR ${ASCEND_CL_INC_DIR}") + message(STATUS "ASCEND_CL_DIR ${ASCEND_CL_DIR}") + INCLUDE_DIRECTORIES(${ASCEND_CL_INC_DIR}) - ADD_LIBRARY(atlas_acl SHARED IMPORTED GLOBAL) - SET_PROPERTY(TARGET atlas_acl PROPERTY IMPORTED_LOCATION ${atlas_acl_lib}) + ADD_LIBRARY(ascendcl SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET ascendcl PROPERTY IMPORTED_LOCATION ${ascendcl_lib}) - ADD_LIBRARY(atlas_acl_op_compiler SHARED IMPORTED GLOBAL) - SET_PROPERTY(TARGET atlas_acl_op_compiler PROPERTY IMPORTED_LOCATION ${atlas_acl_op_compiler_lib}) - add_custom_target(extern_ascend DEPENDS atlas_acl atlas_acl_op_compiler) + ADD_LIBRARY(acl_op_compiler SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET acl_op_compiler PROPERTY IMPORTED_LOCATION ${acl_op_compiler_lib}) + add_custom_target(extern_ascend_cl DEPENDS ascendcl acl_op_compiler) endif() diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 6bd188c483..718904c4a6 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -201,6 +201,9 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) if(WITH_ASCEND AND NOT WITH_ASCEND_CXX11) SET(PROTOBUF_REPOSITORY https://gitee.com/tianjianhe/protobuf.git) SET(PROTOBUF_TAG v3.8.0) +elseif(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11) + SET(PROTOBUF_REPOSITORY https://gitee.com/tianjianhe/protobuf.git) + SET(PROTOBUF_TAG v3.8.0) else() SET(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) SET(PROTOBUF_TAG 9f75c5aa851cd877fb0d93ccc31b8567a6706546) diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index 327de067be..eee45b17c3 100644 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -276,7 +276,12 @@ endif(WITH_BOX_PS) if(WITH_ASCEND OR WITH_ASCEND_CL) include(external/ascend) - list(APPEND third_party_deps extern_ascend) + if(WITH_ASCEND) + list(APPEND third_party_deps extern_ascend) + endif() + if(WITH_ASCEND_CL) + list(APPEND third_party_deps extern_ascend_cl) + endif() endif () if (WITH_PSCORE) diff --git a/paddle/fluid/framework/garbage_collector.cc b/paddle/fluid/framework/garbage_collector.cc index e4142d89e5..a48589a82d 100644 --- a/paddle/fluid/framework/garbage_collector.cc +++ b/paddle/fluid/framework/garbage_collector.cc @@ -89,7 +89,8 @@ StreamGarbageCollector::StreamGarbageCollector(const platform::CUDAPlace &place, : GarbageCollector(place, max_memory_size) { platform::CUDADeviceGuard guard(place.device); PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamCreate(&stream_)); - callback_manager_.reset(new platform::StreamCallbackManager(stream_)); + callback_manager_.reset( + new platform::StreamCallbackManager(stream_)); } StreamGarbageCollector::~StreamGarbageCollector() { diff --git a/paddle/fluid/framework/garbage_collector.h b/paddle/fluid/framework/garbage_collector.h index 9148d2f252..eec8327c72 100644 --- a/paddle/fluid/framework/garbage_collector.h +++ b/paddle/fluid/framework/garbage_collector.h @@ -117,7 +117,8 @@ class StreamGarbageCollector : public GarbageCollector { private: cudaStream_t stream_; - std::unique_ptr callback_manager_; + std::unique_ptr> + callback_manager_; }; class CUDAPinnedGarbageCollector : public GarbageCollector { diff --git a/paddle/fluid/operators/elementwise/CMakeLists.txt b/paddle/fluid/operators/elementwise/CMakeLists.txt index 1309f1d457..216a3f79d6 100644 --- a/paddle/fluid/operators/elementwise/CMakeLists.txt +++ b/paddle/fluid/operators/elementwise/CMakeLists.txt @@ -8,4 +8,7 @@ register_operators(DEPS op_version_registry) cc_test(test_elementwise_add_op_inplace SRCS test_elementwise_add_op_inplace.cc DEPS op_registry elementwise_add_op scope device_context enforce executor) cc_test(test_elementwise_div_grad_grad SRCS test_elementwise_div_grad_grad.cc DEPS op_registry elementwise_div_op scope device_context enforce executor) cc_test(test_elementwise_add_grad_grad SRCS test_elementwise_add_grad_grad.cc DEPS op_registry elementwise_add_op scope device_context enforce executor) + +if(WITH_ASCEND_CL) cc_test(elementwise_op_npu_test SRCS elementwise_op_npu_test.cc DEPS op_registry elementwise_add_op elementwise_sub_op scope device_context enforce executor) +endif() diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 11c7ff546c..7e0675bdef 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -72,7 +72,7 @@ if(WITH_ASCEND) endif() if(WITH_ASCEND_CL) - cc_library(npu_info SRCS npu_info.cc DEPS gflags glog enforce monitor atlas_acl atlas_acl_op_compiler) + cc_library(npu_info SRCS npu_info.cc DEPS gflags glog enforce monitor ascendcl acl_op_compiler) endif() add_subdirectory(dynload) @@ -105,7 +105,7 @@ nv_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS simple_t ENDIF() IF(WITH_ASCEND_CL) -cc_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS simple_threadpool enforce atlas_acl) +cc_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS simple_threadpool enforce) ENDIF() IF(WITH_GPU) diff --git a/paddle/fluid/platform/stream/cuda_stream.cc b/paddle/fluid/platform/stream/cuda_stream.cc index 4543f367ba..5c985f7373 100644 --- a/paddle/fluid/platform/stream/cuda_stream.cc +++ b/paddle/fluid/platform/stream/cuda_stream.cc @@ -35,7 +35,7 @@ bool CUDAStream::Init(const Place& place, const Priority& priority) { PADDLE_ENFORCE_CUDA_SUCCESS( cudaStreamCreateWithPriority(&stream_, kDefaultFlag, 0)); } - callback_manager_.reset(new StreamCallbackManager(stream_)); + callback_manager_.reset(new StreamCallbackManager(stream_)); VLOG(3) << "CUDAStream Init stream: " << stream_ << ", priority: " << static_cast(priority); return true; diff --git a/paddle/fluid/platform/stream/cuda_stream.h b/paddle/fluid/platform/stream/cuda_stream.h index c65d107cf4..94067c0aab 100644 --- a/paddle/fluid/platform/stream/cuda_stream.h +++ b/paddle/fluid/platform/stream/cuda_stream.h @@ -74,7 +74,7 @@ class CUDAStream final { Place place_; cudaStream_t stream_{nullptr}; Priority priority_{Priority::kNormal}; - std::unique_ptr callback_manager_; + std::unique_ptr> callback_manager_; DISABLE_COPY_AND_ASSIGN(CUDAStream); };