|
|
|
@ -21,7 +21,7 @@ func main() {
|
|
|
|
|
etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls")
|
|
|
|
|
numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job")
|
|
|
|
|
checkpointPath := flag.String("checkpoint-path", "/checkpoints/", "save checkpoint path")
|
|
|
|
|
checkpointInterval := flag.Int("checkpoint-interval", "10", "save checkpoint per interval seconds")
|
|
|
|
|
checkpointInterval := flag.Int("checkpoint-interval", 10, "save checkpoint per interval seconds")
|
|
|
|
|
logLevel := flag.String("log-level", "info",
|
|
|
|
|
"log level, possible values: debug, info, warning, error, fatal, panic")
|
|
|
|
|
flag.Parse()
|
|
|
|
@ -34,11 +34,12 @@ func main() {
|
|
|
|
|
|
|
|
|
|
var idx int
|
|
|
|
|
var cp pserver.Checkpoint
|
|
|
|
|
var e *pserver.EtcdClient
|
|
|
|
|
if *index >= 0 {
|
|
|
|
|
idx = *index
|
|
|
|
|
} else {
|
|
|
|
|
timeout := time.Second * time.Duration((*etcdTimeout))
|
|
|
|
|
e := pserver.NewEtcdClient(*etcdEndpoint, *numPservers, timeout)
|
|
|
|
|
e = pserver.NewEtcdClient(*etcdEndpoint, *numPservers, timeout)
|
|
|
|
|
idx, err = e.Register()
|
|
|
|
|
if err != nil {
|
|
|
|
|
panic(err)
|
|
|
|
|