NCCL AllReduce

helinwang-patch-1
Yu Yang 7 years ago
parent 3f88fad08c
commit c0c2e15920

@ -50,10 +50,6 @@ void NCCLAllReduceOpHandle::RunImpl() {
auto &lod_tensor = s->FindVar(var_name)->Get<LoDTensor>(); auto &lod_tensor = s->FindVar(var_name)->Get<LoDTensor>();
void *buffer = const_cast<void *>(lod_tensor.data<void>()); void *buffer = const_cast<void *>(lod_tensor.data<void>());
uintptr_t buf = reinterpret_cast<uintptr_t>(buffer);
if (buf % sizeof(float) != 0) {
VLOG(3) << "Buffer is not aligned " << buf;
}
if (dtype == -1) { if (dtype == -1) {
dtype = platform::ToNCCLDataType(lod_tensor.type()); dtype = platform::ToNCCLDataType(lod_tensor.type());

@ -36,12 +36,10 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) {
class NCCLGroupGuard { class NCCLGroupGuard {
public: public:
inline NCCLGroupGuard() { inline NCCLGroupGuard() { PADDLE_ENFORCE(dynload::ncclGroupStart()); }
mutex().lock();
PADDLE_ENFORCE(dynload::ncclGroupStart());
}
inline ~NCCLGroupGuard() { inline ~NCCLGroupGuard() {
mutex().lock();
PADDLE_ENFORCE(dynload::ncclGroupEnd()); PADDLE_ENFORCE(dynload::ncclGroupEnd());
mutex().unlock(); mutex().unlock();
} }

Loading…
Cancel
Save