diff --git a/mindspore/nn/dynamic_lr.py b/mindspore/nn/dynamic_lr.py index 7d31eee566..81f46029e6 100644 --- a/mindspore/nn/dynamic_lr.py +++ b/mindspore/nn/dynamic_lr.py @@ -262,15 +262,20 @@ def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_e (1 - tmp\_epoch / tmp\_decay\_epoch)^{power} + end\_learning\_rate Where: + .. math:: - `tmp\_epoch = min(current\_epoch, decay\_epoch), - current\_epoch=floor(\frac{i}{step\_per\_epoch})`, + tmp\_epoch = min(current\_epoch, decay\_epoch) + .. math:: - `tmp\_decay\_epoch = decay\_epoch`. + current\_epoch=floor(\frac{i}{step\_per\_epoch}) + + .. math:: + tmp\_decay\_epoch = decay\_epoch If `update_decay_epoch` is true, update the value of `tmp_decay_epoch` every epoch. The formula is: + .. math:: - `tmp\_decay\_epoch = decay\_epoch * ceil(current\_epoch / decay\_epoch)` + tmp\_decay\_epoch = decay\_epoch * ceil(current\_epoch / decay\_epoch) Args: learning_rate (float): The initial value of learning rate. diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py index 1dfc91743c..70464f7f28 100644 --- a/mindspore/nn/wrap/loss_scale.py +++ b/mindspore/nn/wrap/loss_scale.py @@ -194,7 +194,7 @@ class TrainOneStepWithLossScaleCell(Cell): >>> net_with_loss = Net() >>> optimizer = nn.Momentum(net_with_loss.trainable_params(), learning_rate=0.1, momentum=0.9) >>> manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=2**12, scale_factor=2, scale_window=1000) - >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_update_cell=manager) + >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager) >>> train_network.set_train() >>> >>> inputs = Tensor(np.ones([16, 16]).astype(np.float32)) diff --git a/tests/st/pynative/loss_scale/test_loss_scale.py b/tests/st/pynative/loss_scale/test_loss_scale.py index b34fa257b6..c80bc4367f 100644 --- a/tests/st/pynative/loss_scale/test_loss_scale.py +++ b/tests/st/pynative/loss_scale/test_loss_scale.py @@ -164,6 +164,31 @@ def test_loss_scale_fp16_lr_overflow(): assert output_1[0].asnumpy() == output_2[0].asnumpy() assert output_1[1].asnumpy() == output_2[1].asnumpy() == True +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_loss_scale_fp16_lr_overflow_set_sense_scale(): + inputs = Tensor(np.ones([16, 16]).astype(np.float32)) + label = Tensor(np.zeros([16, 16]).astype(np.float32)) + lr = Tensor(np.ones([1], np.float32) * 0.1) + net = NetFP16(16, 16) + net.set_train() + + loss = MSELoss() + optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9) + + net_with_loss = WithLossCell(net, loss) + train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, + scale_sense=Tensor(np.full((1), np.finfo(np.float32).max), + dtype=mstype.float32)) + output_1 = train_network(inputs, label) + + train_network.set_sense_scale(Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)) + output_2 = train_network(inputs, label) + assert output_1[0].asnumpy() == output_2[0].asnumpy() + assert output_1[1].asnumpy() == output_2[1].asnumpy() == True + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training