add more print function for timeout issue, make timeout value larger (#18219)

* add more print function for timeout issue, make timeout value larger
revert-18229-add_multi_gpu_install_check
guru4elephant 6 years ago committed by GitHub
parent cf15c3ff1e
commit 7d76e34ec2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -184,10 +184,10 @@ if(WITH_DISTRIBUTE)
py_test_modules(test_dgc_op MODULES test_dgc_op)
endif()
if(NOT APPLE)
set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 200)
set_tests_properties(test_dist_mnist_nccl PROPERTIES TIMEOUT 250)
set_tests_properties(test_dist_mnist_lars PROPERTIES TIMEOUT 200)
set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 200)
set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 300)
set_tests_properties(test_dist_mnist_nccl PROPERTIES TIMEOUT 300)
set_tests_properties(test_dist_mnist_lars PROPERTIES TIMEOUT 300)
set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 300)
py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext)
py_test_modules(test_dist_se_resnext_nccl MODULES test_dist_se_resnext_nccl)
bash_test_modules(test_launch MODULES test_launch.sh)

@ -144,7 +144,11 @@ class TestDistRunnerBase(object):
"get trainer program done. with nccl2 mode")
trainer_prog = fluid.default_main_program()
else:
my_print(
type(self).__name__,
"do nothing about main program, just use it")
trainer_prog = fluid.default_main_program()
my_print(type(self).__name__, "use main program done.")
if args.use_cuda:
device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
@ -224,11 +228,14 @@ class TestDistRunnerBase(object):
my_print(type(self).__name__, "begin to train on trainer")
out_losses = []
for _ in six.moves.xrange(RUN_STEP):
for i in six.moves.xrange(RUN_STEP):
loss, = exe.run(binary,
fetch_list=[avg_cost.name],
feed=feeder.feed(get_data()))
out_losses.append(loss[0])
my_print(type(self).__name__, "run step %d finished" % i)
my_print(type(self).__name__, "trainer run finished")
if six.PY2:
print(pickle.dumps(out_losses))
else:

Loading…
Cancel
Save