|
|
|
@ -49,13 +49,15 @@ class TestDistMnistNCCL2DGC(TestDistBase):
|
|
|
|
|
log_name=flag_name)
|
|
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
|
result = count_of_sparse_all_reduce_calls(
|
|
|
|
|
'test_dist_mnist_dgc_nccl_tr0_err.log')
|
|
|
|
|
# only 1 layer use dgc now, run_step=5, rampup_begin_step=2, so 1 * (5 - 2) = 3
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
if fluid.core.is_compiled_with_cuda():
|
|
|
|
|
result = count_of_sparse_all_reduce_calls(
|
|
|
|
|
'test_dist_mnist_dgc_nccl_tr0_err.log')
|
|
|
|
|
# only 1 layer use dgc now, run_step=5, rampup_begin_step=2, so 1 * (5 - 2) = 3
|
|
|
|
|
|
|
|
|
|
# temp close this test. In python3 CI, the log is right, but the result
|
|
|
|
|
# has a problem, may be in multi process mode, log is not writed in time.
|
|
|
|
|
# self.assertEqual(result, 3)
|
|
|
|
|
# temp close this test. In python3 CI, the log is right, but the result
|
|
|
|
|
# has a problem, may be in multi process mode, log is not writed in time.
|
|
|
|
|
# self.assertEqual(result, 3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDistMnistNCCL2DGCMultiCards(TestDistBase):
|
|
|
|
@ -76,10 +78,12 @@ class TestDistMnistNCCL2DGCMultiCards(TestDistBase):
|
|
|
|
|
log_name=flag_name)
|
|
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
|
result = count_of_sparse_all_reduce_calls(
|
|
|
|
|
'test_dist_mnist_dgc_nccl_dgc_2cards_local.log')
|
|
|
|
|
# same as above, but use two cards
|
|
|
|
|
self.assertEqual(result, 6)
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
if fluid.core.is_compiled_with_cuda():
|
|
|
|
|
result = count_of_sparse_all_reduce_calls(
|
|
|
|
|
'test_dist_mnist_dgc_nccl_dgc_2cards_local.log')
|
|
|
|
|
# same as above, but use two cards
|
|
|
|
|
self.assertEqual(result, 6)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|