|
|
|
@ -22,8 +22,6 @@ from mindspore.common.api import _executor
|
|
|
|
|
from mindspore.nn import TrainOneStepCell, WithLossCell
|
|
|
|
|
from mindspore.nn.optim import Adam, AdamWeightDecay
|
|
|
|
|
from mindspore.ops import operations as P
|
|
|
|
|
import mindspore.nn.learning_rate_schedule as lr_schedules
|
|
|
|
|
from mindspore.nn.dynamic_lr import polynomial_decay_lr
|
|
|
|
|
|
|
|
|
|
context.set_context(enable_sparse=True)
|
|
|
|
|
|
|
|
|
@ -137,7 +135,7 @@ def test_adam_group1():
|
|
|
|
|
net_with_loss = WithLossCell(net, loss)
|
|
|
|
|
all_params = net.trainable_params()
|
|
|
|
|
|
|
|
|
|
poly_decay_lr = polynomial_decay_lr(0.01, 0.0001, total_step=10, step_per_epoch=1, decay_epoch=3, power=1.0)
|
|
|
|
|
poly_decay_lr = nn.polynomial_decay_lr(0.01, 0.0001, total_step=10, step_per_epoch=1, decay_epoch=3, power=1.0)
|
|
|
|
|
|
|
|
|
|
group_params = [{'params': [all_params[0]], 'lr': poly_decay_lr, 'weight_decay': 0.9},
|
|
|
|
|
{'params': [all_params[1]]}]
|
|
|
|
@ -157,7 +155,7 @@ def test_adam_group2():
|
|
|
|
|
net_with_loss = WithLossCell(net, loss)
|
|
|
|
|
all_params = net.trainable_params()
|
|
|
|
|
|
|
|
|
|
schedule_lr = lr_schedules.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
|
|
|
|
|
schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
|
|
|
|
|
group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
|
|
|
|
|
{'params': [all_params[1]]}]
|
|
|
|
|
optimizer = nn.Adam(group_params, learning_rate=schedule_lr)
|
|
|
|
@ -175,7 +173,7 @@ def test_adamweightdecay_group():
|
|
|
|
|
net_with_loss = WithLossCell(net, loss)
|
|
|
|
|
all_params = net.trainable_params()
|
|
|
|
|
|
|
|
|
|
schedule_lr = lr_schedules.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
|
|
|
|
|
schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
|
|
|
|
|
group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
|
|
|
|
|
{'params': [all_params[1]]}]
|
|
|
|
|
optimizer = nn.AdamWeightDecay(group_params, learning_rate=schedule_lr)
|
|
|
|
@ -193,7 +191,7 @@ def test_adamoffload_group():
|
|
|
|
|
net_with_loss = WithLossCell(net, loss)
|
|
|
|
|
all_params = net.trainable_params()
|
|
|
|
|
|
|
|
|
|
schedule_lr = lr_schedules.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
|
|
|
|
|
schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
|
|
|
|
|
group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
|
|
|
|
|
{'params': [all_params[1]]}]
|
|
|
|
|
optimizer = nn.AdamOffload(group_params, learning_rate=schedule_lr)
|
|
|
|
|