|
|
@ -19,6 +19,14 @@ import numpy as np
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
import paddle.fluid.core as core
|
|
|
|
import paddle.fluid.core as core
|
|
|
|
from paddle.fluid.tests.unittests.test_profiler import TestProfiler
|
|
|
|
from paddle.fluid.tests.unittests.test_profiler import TestProfiler
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# NCCL 2.7 decides to use shared memory while NCCL 2.6 didn't, hence causing the error.
|
|
|
|
|
|
|
|
# include/shm.h:28 NCCL WARN Call to posix_fallocate failed: No space left on device
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# Set environment variables NCCL_SHM_DISABLE=1 to disables the Shared Memory (SHM) transports
|
|
|
|
|
|
|
|
# and force to use P2P which is the default transports way of NCCL2.6.
|
|
|
|
|
|
|
|
os.environ['NCCL_SHM_DISABLE'] = str(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestPEProfiler(TestProfiler):
|
|
|
|
class TestPEProfiler(TestProfiler):
|
|
|
|