@ -114,7 +114,7 @@ struct NCCLContextMap {
// if num_trainers == 1, should create a new nccl id for local comms.
// if num_trainers == 1, should create a new nccl id for local comms.
if ( num_trainers = = 1 & & nccl_id = = nullptr ) {
if ( num_trainers = = 1 & & nccl_id = = nullptr ) {
std : : lock_guard < std : : mutex > guard ( NCCLGroupGuard : : NCCLMutex ( ) ) ;
std : : lock_guard < std : : mutex > guard ( NCCLGroupGuard : : NCCLMutex ( ) ) ;
PADDLE_ ENFO RC E_CUDA_SUCCESS( platform : : dynload : : ncclCommInitAll (
PADDLE_ RETRY _CUDA_SUCCESS( platform : : dynload : : ncclCommInitAll (
comms . get ( ) , static_cast < int > ( order_ . size ( ) ) , order_ . data ( ) ) ) ;
comms . get ( ) , static_cast < int > ( order_ . size ( ) ) , order_ . data ( ) ) ) ;
} else {
} else {
PADDLE_ENFORCE_NOT_NULL ( nccl_id , platform : : errors : : InvalidArgument (
PADDLE_ENFORCE_NOT_NULL ( nccl_id , platform : : errors : : InvalidArgument (
@ -132,8 +132,8 @@ struct NCCLContextMap {
}
}
VLOG ( 1 ) < < " init nccl rank: " < < rank < < " , nranks: " < < nranks
VLOG ( 1 ) < < " init nccl rank: " < < rank < < " , nranks: " < < nranks
< < " , gpu_id: " < < gpu_id < < " , dev_id: " < < order_ [ i ] ;
< < " , gpu_id: " < < gpu_id < < " , dev_id: " < < order_ [ i ] ;
PADDLE_ ENFO RC E_CUDA_SUCCESS( cudaSetDevice ( gpu_id ) ) ;
PADDLE_ RETRY _CUDA_SUCCESS( cudaSetDevice ( gpu_id ) ) ;
PADDLE_ ENFO RC E_CUDA_SUCCESS( platform : : dynload : : ncclCommInitRank (
PADDLE_ RETRY _CUDA_SUCCESS( platform : : dynload : : ncclCommInitRank (
comms . get ( ) + i , nranks , * nccl_id , rank ) ) ;
comms . get ( ) + i , nranks , * nccl_id , rank ) ) ;
}
}
}
}