|
|
|
@ -21,35 +21,14 @@ from . import core
|
|
|
|
|
__all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def append_regularization_ops(parameters_and_grads, regularization=None):
|
|
|
|
|
def _create_regularization_of_grad(param, grad, regularization=None):
|
|
|
|
|
""" Create and add backward regularization Operators
|
|
|
|
|
|
|
|
|
|
Creates and adds backward regularization operators in the BlockDesc.
|
|
|
|
|
This will add gradients of the regularizer function to the gradients
|
|
|
|
|
of the parameters and return these modified gradients. This is the
|
|
|
|
|
same as implementing weight decay in optimizers for regularization.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
parameters_and_grads: A list of (parameters, gradients) pairs
|
|
|
|
|
that need to be regularized.
|
|
|
|
|
regularization: A global regularizer. If the parameter is not
|
|
|
|
|
set. It will be applied with regularizer.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list[(Variable, Variable)]: list of (parameters, gradients) \
|
|
|
|
|
pair with the regularized gradient
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
Exception: Unknown regularization type
|
|
|
|
|
Function helper of append_regularization_ops.
|
|
|
|
|
"""
|
|
|
|
|
params_and_grads = []
|
|
|
|
|
for param, grad in parameters_and_grads:
|
|
|
|
|
# If no gradient then we don't need to do anything
|
|
|
|
|
if grad is None:
|
|
|
|
|
params_and_grads.append((param, grad))
|
|
|
|
|
continue
|
|
|
|
|
with param.block.program._optimized_guard(
|
|
|
|
|
[param, grad]), framework.name_scope('regularization'):
|
|
|
|
|
# If no gradient or no regularization is specified, then we don't need to do anything
|
|
|
|
|
if grad is None or (param.regularizer is None and regularization is None):
|
|
|
|
|
return grad
|
|
|
|
|
regularization_term = None
|
|
|
|
|
if param.regularizer is not None:
|
|
|
|
|
# Add variable for regularization term in grad block
|
|
|
|
@ -57,10 +36,7 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
|
|
|
|
|
elif regularization is not None:
|
|
|
|
|
regularization_term = regularization(param, grad, grad.block)
|
|
|
|
|
|
|
|
|
|
# If no regularization specified, then we don't need to do anything
|
|
|
|
|
if regularization_term is None:
|
|
|
|
|
params_and_grads.append((param, grad))
|
|
|
|
|
continue
|
|
|
|
|
assert regularization_term is not None
|
|
|
|
|
|
|
|
|
|
new_grad = grad
|
|
|
|
|
if grad.type == core.VarDesc.VarType.SELECTED_ROWS:
|
|
|
|
@ -82,8 +58,43 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
|
|
|
|
|
else:
|
|
|
|
|
grad.block.append_op(type='sum', inputs=inputs, outputs=outputs)
|
|
|
|
|
|
|
|
|
|
params_and_grads.append((param, new_grad))
|
|
|
|
|
return new_grad
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def append_regularization_ops(parameters_and_grads, regularization=None):
|
|
|
|
|
"""Create and add backward regularization Operators
|
|
|
|
|
|
|
|
|
|
Creates and adds backward regularization operators in the BlockDesc.
|
|
|
|
|
This will add gradients of the regularizer function to the gradients
|
|
|
|
|
of the parameters and return these modified gradients. This is the
|
|
|
|
|
same as implementing weight decay in optimizers for regularization.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
parameters_and_grads: A list of (parameters, gradients) pairs
|
|
|
|
|
that need to be regularized.
|
|
|
|
|
regularization: A global regularizer. If the parameter is not
|
|
|
|
|
set. It will be applied with regularizer.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list[(Variable, Variable)]: list of (parameters, gradients) \
|
|
|
|
|
pair with the regularized gradient
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
Exception: Unknown regularization type
|
|
|
|
|
"""
|
|
|
|
|
params_and_grads = []
|
|
|
|
|
if in_dygraph_mode():
|
|
|
|
|
for param, grad in parameters_and_grads:
|
|
|
|
|
new_grad = _create_regularization_of_grad(param, grad,
|
|
|
|
|
regularization)
|
|
|
|
|
params_and_grads.append((param, new_grad))
|
|
|
|
|
else:
|
|
|
|
|
with framework.name_scope('regularization'):
|
|
|
|
|
for param, grad in parameters_and_grads:
|
|
|
|
|
with param.block.program._optimized_guard([param, grad]):
|
|
|
|
|
new_grad = _create_regularization_of_grad(param, grad,
|
|
|
|
|
regularization)
|
|
|
|
|
params_and_grads.append((param, new_grad))
|
|
|
|
|
return params_and_grads
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|