|
|
|
@ -14,7 +14,6 @@
|
|
|
|
|
# ============================================================================
|
|
|
|
|
"""learning rate generator"""
|
|
|
|
|
import math
|
|
|
|
|
from collections import Counter
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
|
def get_lr_cifar10(current_step, lr_max, total_epochs, steps_per_epoch):
|
|
|
|
@ -44,71 +43,18 @@ def get_lr_cifar10(current_step, lr_max, total_epochs, steps_per_epoch):
|
|
|
|
|
|
|
|
|
|
return learning_rate
|
|
|
|
|
|
|
|
|
|
def get_lr_imagenet(cfg, steps_per_epoch):
|
|
|
|
|
def get_lr_imagenet(lr, epoch_size, steps_per_epoch):
|
|
|
|
|
"""generate learning rate array"""
|
|
|
|
|
if cfg.lr_scheduler == 'exponential':
|
|
|
|
|
lr = warmup_step_lr(cfg.learning_rate,
|
|
|
|
|
cfg.lr_epochs,
|
|
|
|
|
steps_per_epoch,
|
|
|
|
|
cfg.warmup_epochs,
|
|
|
|
|
cfg.epoch_size,
|
|
|
|
|
gamma=cfg.lr_gamma,
|
|
|
|
|
)
|
|
|
|
|
elif cfg.lr_scheduler == 'cosine_annealing':
|
|
|
|
|
lr = warmup_cosine_annealing_lr(cfg.learning_rate,
|
|
|
|
|
steps_per_epoch,
|
|
|
|
|
cfg.warmup_epochs,
|
|
|
|
|
cfg.epoch_size,
|
|
|
|
|
cfg.T_max,
|
|
|
|
|
cfg.eta_min)
|
|
|
|
|
else:
|
|
|
|
|
raise NotImplementedError(cfg.lr_scheduler)
|
|
|
|
|
|
|
|
|
|
lr = warmup_cosine_annealing_lr(lr, epoch_size, steps_per_epoch)
|
|
|
|
|
return lr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
|
|
|
|
|
"""Linear learning rate"""
|
|
|
|
|
lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
|
|
|
|
|
lr = float(init_lr) + lr_inc * current_step
|
|
|
|
|
return lr
|
|
|
|
|
|
|
|
|
|
def warmup_step_lr(lr, lr_epochs, steps_per_epoch, warmup_epochs, max_epoch, gamma=0.1):
|
|
|
|
|
"""Linear warm up learning rate"""
|
|
|
|
|
base_lr = lr
|
|
|
|
|
warmup_init_lr = 0
|
|
|
|
|
total_steps = int(max_epoch * steps_per_epoch)
|
|
|
|
|
warmup_steps = int(warmup_epochs * steps_per_epoch)
|
|
|
|
|
milestones = lr_epochs
|
|
|
|
|
milestones_steps = []
|
|
|
|
|
for milestone in milestones:
|
|
|
|
|
milestones_step = milestone * steps_per_epoch
|
|
|
|
|
milestones_steps.append(milestones_step)
|
|
|
|
|
|
|
|
|
|
lr_each_step = []
|
|
|
|
|
lr = base_lr
|
|
|
|
|
milestones_steps_counter = Counter(milestones_steps)
|
|
|
|
|
for i in range(total_steps):
|
|
|
|
|
if i < warmup_steps:
|
|
|
|
|
lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
|
|
|
|
|
else:
|
|
|
|
|
lr = lr * gamma**milestones_steps_counter[i]
|
|
|
|
|
lr_each_step.append(lr)
|
|
|
|
|
|
|
|
|
|
return np.array(lr_each_step).astype(np.float32)
|
|
|
|
|
|
|
|
|
|
def multi_step_lr(lr, milestones, steps_per_epoch, max_epoch, gamma=0.1):
|
|
|
|
|
return warmup_step_lr(lr, milestones, steps_per_epoch, 0, max_epoch, gamma=gamma)
|
|
|
|
|
|
|
|
|
|
def step_lr(lr, epoch_size, steps_per_epoch, max_epoch, gamma=0.1):
|
|
|
|
|
lr_epochs = []
|
|
|
|
|
for i in range(1, max_epoch):
|
|
|
|
|
if i % epoch_size == 0:
|
|
|
|
|
lr_epochs.append(i)
|
|
|
|
|
return multi_step_lr(lr, lr_epochs, steps_per_epoch, max_epoch, gamma=gamma)
|
|
|
|
|
|
|
|
|
|
def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0):
|
|
|
|
|
def warmup_cosine_annealing_lr(lr, max_epoch, steps_per_epoch, warmup_epochs=5, T_max=150, eta_min=0.0):
|
|
|
|
|
""" Cosine annealing learning rate"""
|
|
|
|
|
base_lr = lr
|
|
|
|
|
warmup_init_lr = 0
|
|
|
|
|