Add detail logs on resnet unit test (#20558)

Add detail logs on resnet unit test
revert-20712-fix_depthwise_conv
gongweibao 6 years ago committed by GitHub
parent 36c85ef492
commit bf6470c71e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -15,7 +15,7 @@ if [[ ${TEST_TIMEOUT}"x" == "x" ]]; then
fi
# rm flag file
rm -f ${name}*.log
rm -f ${name}_*.log
# start the unit test
run_time=$(( $TEST_TIMEOUT - 10 ))
@ -28,9 +28,15 @@ fi
echo "${name} faild with ${exit_code}"
netstat -an
# paddle log
echo "${name} log"
cat -n ${name}*.log
for log in `ls ${name}_*.log`
do
printf "\ncat ${log}\n"
cat -n ${log}
done
#display system context
for i in {1..2}; do

@ -525,7 +525,11 @@ class TestDistBase(unittest.TestCase):
self._port_set.add(port)
return port
def start_pserver(self, model_file, check_error_log, required_envs):
def start_pserver(self,
model_file,
check_error_log,
required_envs,
log_name=""):
ps0_ep, ps1_ep = self._ps_endpoints.split(",")
ps_cmd = "%s"
@ -548,8 +552,8 @@ class TestDistBase(unittest.TestCase):
print(ps0_cmd)
print(ps1_cmd)
ps0_pipe = open("/tmp/ps0_err.log", "wb")
ps1_pipe = open("/tmp/ps1_err.log", "wb")
ps0_pipe = open(log_name + "_ps0_err.log", "wb")
ps1_pipe = open(log_name + "_ps1_err.log", "wb")
print_to_err(type(self).__name__, "going to start pserver process 0")
ps0_proc = subprocess.Popen(
@ -628,8 +632,8 @@ class TestDistBase(unittest.TestCase):
def _run_cluster(self, model, envs, check_error_log, log_name):
# Run dist train to compare with local results
ps0, ps1, ps0_pipe, ps1_pipe = self.start_pserver(model,
check_error_log, envs)
ps0, ps1, ps0_pipe, ps1_pipe = self.start_pserver(
model, check_error_log, envs, log_name=log_name)
ps0_ep, ps1_ep = self._ps_endpoints.split(",")
@ -848,7 +852,7 @@ class TestDistBase(unittest.TestCase):
if check_error_log:
required_envs["GLOG_vmodule"] = \
"fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10,alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10"
"fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10,alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10,executor=10,operator=10"
required_envs["GLOG_logtostderr"] = "1"
local_losses \

@ -15,7 +15,9 @@
from __future__ import print_function
import unittest
from test_dist_base import TestDistBase
import os
flag_name = os.path.splitext(__file__)[0]
def skip_ci(func):
@ -36,7 +38,11 @@ class TestDistSeResneXt2x2Async(TestDistBase):
@skip_ci
def test_dist_train(self):
self.check_with_place("dist_se_resnext.py", delta=100)
self.check_with_place(
"dist_se_resnext.py",
delta=100,
check_error_log=True,
log_name=flag_name)
if __name__ == "__main__":

Loading…
Cancel
Save