|
|
@ -24,10 +24,10 @@ from .fp16_lists import AutoMixedPrecisionLists
|
|
|
|
__all__ = ["decorate"]
|
|
|
|
__all__ = ["decorate"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OptimizerWithMixedPrecison(object):
|
|
|
|
class OptimizerWithMixedPrecision(object):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Optimizer with mixed-precision (MP) training. This is a wrapper of a common
|
|
|
|
Optimizer with mixed-precision (MP) training. This is a wrapper of a common
|
|
|
|
optimizer, plus the support of mixed-precision pretraining. The object
|
|
|
|
optimizer, plus the support of mixed-precision pre-training. The object
|
|
|
|
of this class almost has the same behavior as the common optimizer, with the
|
|
|
|
of this class almost has the same behavior as the common optimizer, with the
|
|
|
|
methods `minimize()`, `backward()`, `apply_gradients()` implemented.
|
|
|
|
methods `minimize()`, `backward()`, `apply_gradients()` implemented.
|
|
|
|
Additionally, it enables the MP training automatically, i.e, the creation
|
|
|
|
Additionally, it enables the MP training automatically, i.e, the creation
|
|
|
@ -116,7 +116,7 @@ class OptimizerWithMixedPrecison(object):
|
|
|
|
no_grad_set=None,
|
|
|
|
no_grad_set=None,
|
|
|
|
callbacks=None):
|
|
|
|
callbacks=None):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Backward propogation or auto differentiation for gradients' computation.
|
|
|
|
Backward propagation or auto differentiation for gradients' computation.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
Args:
|
|
|
|
loss (Variable): The loss Variable to minimize.
|
|
|
|
loss (Variable): The loss Variable to minimize.
|
|
|
@ -124,7 +124,7 @@ class OptimizerWithMixedPrecison(object):
|
|
|
|
parameters in `parameter_list`.
|
|
|
|
parameters in `parameter_list`.
|
|
|
|
parameter_list (list|None): A list of Variables to update.
|
|
|
|
parameter_list (list|None): A list of Variables to update.
|
|
|
|
no_grad_set (set|None): A set of Variables should be ignored.
|
|
|
|
no_grad_set (set|None): A set of Variables should be ignored.
|
|
|
|
callbacks (list|None): A list of callables to run when appending
|
|
|
|
callbacks (list|None): A list of callable objects to run when appending
|
|
|
|
backward operator for one parameter.
|
|
|
|
backward operator for one parameter.
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Returns:
|
|
|
@ -136,6 +136,8 @@ class OptimizerWithMixedPrecison(object):
|
|
|
|
self._params_grads = self._optimizer.backward(
|
|
|
|
self._params_grads = self._optimizer.backward(
|
|
|
|
self._scaled_loss, startup_program, parameter_list, no_grad_set,
|
|
|
|
self._scaled_loss, startup_program, parameter_list, no_grad_set,
|
|
|
|
callbacks)
|
|
|
|
callbacks)
|
|
|
|
|
|
|
|
# Change the op_role_var attr for some ops, so that gradients
|
|
|
|
|
|
|
|
# transferred across GPUs can be FP16.
|
|
|
|
update_role_var_grad(self._train_program, self._params_grads)
|
|
|
|
update_role_var_grad(self._train_program, self._params_grads)
|
|
|
|
scaled_params_grads = []
|
|
|
|
scaled_params_grads = []
|
|
|
|
for p, g in self._params_grads:
|
|
|
|
for p, g in self._params_grads:
|
|
|
@ -257,7 +259,7 @@ def decorate(optimizer,
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
if amp_lists is None:
|
|
|
|
if amp_lists is None:
|
|
|
|
amp_lists = AutoMixedPrecisionLists()
|
|
|
|
amp_lists = AutoMixedPrecisionLists()
|
|
|
|
mp_optimizer = OptimizerWithMixedPrecison(
|
|
|
|
mp_optimizer = OptimizerWithMixedPrecision(
|
|
|
|
optimizer, amp_lists, init_loss_scaling, use_dynamic_loss_scaling,
|
|
|
|
optimizer, amp_lists, init_loss_scaling, use_dynamic_loss_scaling,
|
|
|
|
incr_every_n_steps, decr_every_n_nan_or_inf, incr_ratio, decr_ratio)
|
|
|
|
incr_every_n_steps, decr_every_n_nan_or_inf, incr_ratio, decr_ratio)
|
|
|
|
|
|
|
|
|
|
|
|