|
|
|
@ -12,8 +12,7 @@ import (
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
// DefaultEtcdTimeout is the default etcd timeout
|
|
|
|
|
DefaultEtcdTimeout time.Duration = 5 * time.Second
|
|
|
|
|
defaultEtcdTimeout time.Duration = 5 * time.Second
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// EtcdClient is used by pserver client that is a part of trainer process.
|
|
|
|
@ -48,7 +47,7 @@ func (p *EtcdClient) Desired() int {
|
|
|
|
|
|
|
|
|
|
psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value))
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Errorf("psDesired %s invalid %v", psDesired, err)
|
|
|
|
|
log.Errorf("psDesired %d invalid %v", psDesired, err)
|
|
|
|
|
time.Sleep(p.timeout)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
@ -67,12 +66,12 @@ func (p *EtcdClient) List() []Server {
|
|
|
|
|
for {
|
|
|
|
|
for i := 0; i < psDesired; i++ {
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), p.timeout)
|
|
|
|
|
cancel()
|
|
|
|
|
psKey := pserver.PsPath + strconv.Itoa(i)
|
|
|
|
|
log.Debugf("checking %s", psKey)
|
|
|
|
|
resp, err := p.client.Get(ctx, psKey)
|
|
|
|
|
cancel()
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Infof("Get psKey=%s error, %v", psKey, err)
|
|
|
|
|
log.Infof("Get psKey= %s error, %v", psKey, err)
|
|
|
|
|
time.Sleep(p.timeout)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
@ -107,11 +106,11 @@ func NewEtcd(endpoints string) *EtcdClient {
|
|
|
|
|
for {
|
|
|
|
|
cli, err = clientv3.New(clientv3.Config{
|
|
|
|
|
Endpoints: ep,
|
|
|
|
|
DialTimeout: DefaultEtcdTimeout,
|
|
|
|
|
DialTimeout: defaultEtcdTimeout,
|
|
|
|
|
})
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Errorf("Init etcd connection failed: %v", err)
|
|
|
|
|
time.Sleep(DefaultEtcdTimeout)
|
|
|
|
|
time.Sleep(defaultEtcdTimeout)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
@ -119,7 +118,7 @@ func NewEtcd(endpoints string) *EtcdClient {
|
|
|
|
|
log.Infof("Connected to etcd: %s\n", endpoints)
|
|
|
|
|
client := &EtcdClient{
|
|
|
|
|
client: cli,
|
|
|
|
|
timeout: DefaultEtcdTimeout,
|
|
|
|
|
timeout: defaultEtcdTimeout,
|
|
|
|
|
endpoints: ep,
|
|
|
|
|
}
|
|
|
|
|
return client
|
|
|
|
|