Merge pull request #11690 from typhoonzero/fix_trainer_nccl2_env

fix trainer nccl2 env
ce-debug
Wu Yi 7 years ago committed by GitHub
commit 19e877ffdb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -315,7 +315,7 @@ class Trainer(object):
for ip in worker_ips.split(","):
worker_endpoints.append(':'.join([ip, port]))
self.num_trainers = len(worker_endpoints)
current_endpoint = os.getenv("POD_IP") + ":" + port
current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port
worker_endpoints.remove(current_endpoint)
# TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id
# in ParallelExecutor to start

Loading…
Cancel
Save