Enable P2P memory copy

On k40 with 4 devices, time reduces from ~4.0 to ~3.8+, should be
more obvious on better hardware
shanyi15-patch-2
Xin Pan 7 years ago
parent 1ca1e1c384
commit 18ac6947d0

@ -26,6 +26,7 @@ namespace paddle {
namespace framework {
std::once_flag gflags_init_flag;
std::once_flag p2p_init_flag;
void InitGflags(std::vector<std::string> &argv) {
std::call_once(gflags_init_flag, [&]() {
@ -42,6 +43,25 @@ void InitGflags(std::vector<std::string> &argv) {
});
}
void InitP2P(int count) {
std::call_once(p2p_init_flag, [&]() {
for (int i = 0; i < count; ++i) {
for (int j = 0; j < count; ++j) {
if (i == j) continue;
int can_acess = -1;
PADDLE_ENFORCE(cudaDeviceCanAccessPeer(&can_acess, i, j),
"Failed to test P2P access.");
if (can_acess != 1) {
LOG(WARNING) << "Cannot enable P2P access from " << i << " to " << j;
} else {
cudaSetDevice(i);
cudaDeviceEnablePeerAccess(j, 0);
}
}
}
});
}
void InitDevices() {
/*Init all avaiable devices by default */
@ -63,7 +83,7 @@ void InitDevices() {
for (int i = 0; i < count; ++i) {
places.emplace_back(platform::CUDAPlace(i));
}
InitP2P(count);
platform::DeviceContextPool::Init(places);
}

Loading…
Cancel
Save