|
|
|
@ -244,7 +244,8 @@ class Lamb(Optimizer):
|
|
|
|
|
>>> optim = nn.Lamb(params=net.trainable_params(), learning_rate=0.1)
|
|
|
|
|
>>>
|
|
|
|
|
>>> #2) Use parameter groups and set different values
|
|
|
|
|
>>> poly_decay_lr = learning_rate_schedule.PolynomialDecayLR()
|
|
|
|
|
>>> poly_decay_lr = learning_rate_schedule.PolynomialDecayLR(learning_rate=0.1, end_learning_rate=0.01,
|
|
|
|
|
... decay_steps=4, power = 0.5)
|
|
|
|
|
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
|
|
|
|
|
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
|
|
|
|
|
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
|
|
|
|
|