|
|
|
@ -954,7 +954,7 @@ class ParameterServerLauncher(object):
|
|
|
|
|
"TRAINING_ROLE": "PSERVER",
|
|
|
|
|
"PADDLE_TRAINERS_NUM": str(self.worker_num),
|
|
|
|
|
"POD_IP": cur_server.endpoint.split(":")[0],
|
|
|
|
|
"PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "1")),
|
|
|
|
|
"PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "0")),
|
|
|
|
|
"PADDLE_GLOO_RENDEZVOUS": "3",
|
|
|
|
|
"PADDLE_GLOO_FS_PATH": self.gloo_rendezvous_dir,
|
|
|
|
|
"PADDLE_GLOO_HTTP_ENDPOINT": self.http_port
|
|
|
|
@ -1018,7 +1018,7 @@ class ParameterServerLauncher(object):
|
|
|
|
|
self.heter_worker_endpoints,
|
|
|
|
|
"TRAINING_ROLE": "TRAINER",
|
|
|
|
|
"PADDLE_TRAINER_ID": str(cur_worker.rank),
|
|
|
|
|
"PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "1")),
|
|
|
|
|
"PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "0")),
|
|
|
|
|
"PADDLE_GLOO_RENDEZVOUS": "3",
|
|
|
|
|
"PADDLE_GLOO_FS_PATH": self.gloo_rendezvous_dir,
|
|
|
|
|
"FLAGS_selected_gpus": "0",
|
|
|
|
@ -1088,7 +1088,7 @@ class ParameterServerLauncher(object):
|
|
|
|
|
"TRAINING_ROLE": "HETER_TRAINER",
|
|
|
|
|
"PADDLE_TRAINERS_NUM": str(self.worker_num),
|
|
|
|
|
"POD_IP": cur_heter_worker.endpoint.split(":")[0],
|
|
|
|
|
"PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "1")),
|
|
|
|
|
"PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "0")),
|
|
|
|
|
"PADDLE_GLOO_RENDEZVOUS": "3",
|
|
|
|
|
"PADDLE_GLOO_FS_PATH": self.gloo_rendezvous_dir,
|
|
|
|
|
"FLAGS_selected_gpus": "0",
|
|
|
|
|