|
|
|
@ -22,7 +22,6 @@ import math
|
|
|
|
|
import collections
|
|
|
|
|
import numpy as np
|
|
|
|
|
import mindspore.nn as nn
|
|
|
|
|
from mindspore import context
|
|
|
|
|
from mindspore import log as logger
|
|
|
|
|
from mindspore.ops import operations as P
|
|
|
|
|
from mindspore.common.tensor import Tensor
|
|
|
|
@ -107,10 +106,11 @@ class LossCallBack(Callback):
|
|
|
|
|
percent = 1
|
|
|
|
|
epoch_num -= 1
|
|
|
|
|
print("epoch: {}, current epoch percent: {}, step: {}, outputs are {}"
|
|
|
|
|
.format(int(epoch_num), "%.3f" % percent, cb_params.cur_step_num, str(cb_params.net_outputs)))
|
|
|
|
|
.format(int(epoch_num), "%.3f" % percent, cb_params.cur_step_num, str(cb_params.net_outputs)),
|
|
|
|
|
flush=True)
|
|
|
|
|
else:
|
|
|
|
|
print("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
|
|
|
|
|
str(cb_params.net_outputs)))
|
|
|
|
|
str(cb_params.net_outputs)), flush=True)
|
|
|
|
|
|
|
|
|
|
def LoadNewestCkpt(load_finetune_checkpoint_dir, steps_per_epoch, epoch_num, prefix):
|
|
|
|
|
"""
|
|
|
|
@ -220,22 +220,13 @@ def _get_poly_lr(global_step, lr_init, lr_end, lr_max, warmup_steps, total_steps
|
|
|
|
|
return learning_rate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_bert_thor_lr():
|
|
|
|
|
if context.get_context("device_target") == "Ascend":
|
|
|
|
|
learning_rate = _get_poly_lr(global_step=0, lr_init=0.0, lr_end=3.244018779068399e-05,
|
|
|
|
|
lr_max=0.0034022148941459055, warmup_steps=0, total_steps=30000, poly_power=1)
|
|
|
|
|
else:
|
|
|
|
|
learning_rate = _get_poly_lr(global_step=0, lr_init=0.0, lr_end=1e-6, lr_max=1.7e-3, warmup_steps=0,
|
|
|
|
|
total_steps=30000, poly_power=1)
|
|
|
|
|
|
|
|
|
|
def get_bert_thor_lr(lr_max=0.0034, lr_min=3.244e-05, lr_power=1.0, lr_total_steps=30000):
|
|
|
|
|
learning_rate = _get_poly_lr(global_step=0, lr_init=0.0, lr_end=lr_min, lr_max=lr_max, warmup_steps=0,
|
|
|
|
|
total_steps=lr_total_steps, poly_power=lr_power)
|
|
|
|
|
return Tensor(learning_rate)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_bert_thor_damping():
|
|
|
|
|
if context.get_context("device_target") == "Ascend":
|
|
|
|
|
damping = _get_poly_lr(global_step=0, lr_init=0.0, lr_end=1e-6, lr_max=5e-2, warmup_steps=0, total_steps=30000,
|
|
|
|
|
poly_power=1)
|
|
|
|
|
else:
|
|
|
|
|
damping = _get_poly_lr(global_step=0, lr_init=0.0, lr_end=1e-6, lr_max=3.5e-2, warmup_steps=0,
|
|
|
|
|
total_steps=30000, poly_power=1)
|
|
|
|
|
def get_bert_thor_damping(damping_max=5e-2, damping_min=1e-6, damping_power=1.0, damping_total_steps=30000):
|
|
|
|
|
damping = _get_poly_lr(global_step=0, lr_init=0.0, lr_end=damping_min, lr_max=damping_max, warmup_steps=0,
|
|
|
|
|
total_steps=damping_total_steps, poly_power=damping_power)
|
|
|
|
|
return Tensor(damping)
|
|
|
|
|