|
|
|
@ -77,7 +77,7 @@ def distribute_pretrain():
|
|
|
|
|
|
|
|
|
|
print("hccl_config_dir:", args.hccl_config_dir)
|
|
|
|
|
print("hccl_time_out:", args.hccl_time_out)
|
|
|
|
|
cmd = append_cmd_env(cmd, 'HCCL_CONNECTION_TIMEOUT', args.hccl_time_out)
|
|
|
|
|
cmd = append_cmd_env(cmd, 'HCCL_CONNECT_TIMEOUT', args.hccl_time_out)
|
|
|
|
|
cmd = append_cmd_env(cmd, 'RANK_TABLE_FILE', args.hccl_config_dir)
|
|
|
|
|
|
|
|
|
|
cores = multiprocessing.cpu_count()
|
|
|
|
|