fix distributed launch.py (#17571)

test=develop
fix_ema
chengduo 7 years ago committed by GitHub
parent 6e11f97708
commit ca03f4989a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -75,11 +75,11 @@ def start_procs(gpus, entrypoint, entrypoint_args, log_dir):
nranks = num_nodes * gpus
# ======== for dist training =======
gpu_ids = get_gpu_ids(gpus)
for i in gpu_ids:
for i in range(gpus):
curr_env = {}
curr_env.update(default_envs)
curr_env.update({
"FLAGS_selected_gpus": "%d" % i,
"FLAGS_selected_gpus": "%d" % gpu_ids[i],
"PADDLE_TRAINER_ID": "%d" % (node_trainer_id * gpus + i),
"PADDLE_CURRENT_ENDPOINT": "%s:617%d" % (current_ip, i),
# nranks

Loading…
Cancel
Save