parent
be00b0c4d6
commit
8623e48ba8
@ -0,0 +1,99 @@
|
||||
import paddle.v2.framework.framework as framework
|
||||
|
||||
__all__ = ['append_regularization_ops', 'L2DecayRegularizer']
|
||||
|
||||
|
||||
def append_regularization_ops(parameters_and_grads):
|
||||
"""Create and add backward regularization Operators
|
||||
|
||||
Creates and adds backward regularization operators in the BlockDesc.
|
||||
This will add gradients of the regularizer function to the gradients
|
||||
of the parameters and return these modified gradients. This is the
|
||||
same as implementing weight decay in optimizers for regularization.
|
||||
|
||||
Args:
|
||||
parameters_and_grads: A list of (parameters, gradients) pairs
|
||||
that need to be regularized.
|
||||
|
||||
Returns:
|
||||
list of (parameters, gradients) pair with the regularized gradient
|
||||
|
||||
Raises:
|
||||
Exception: Unknown regularization type
|
||||
"""
|
||||
params_and_grads = []
|
||||
for param, grad in parameters_and_grads:
|
||||
# If no gradient or no regularization specified,
|
||||
# then we don't need to do anything
|
||||
if grad is None or param.regularizer is None:
|
||||
params_and_grads.append((param, grad))
|
||||
continue
|
||||
|
||||
# Add variable for regularization term in grad block
|
||||
regularization_term = param.regularizer(param, grad.block)
|
||||
assert grad.shape == regularization_term.shape
|
||||
|
||||
grad.block.append_op(
|
||||
type='elementwise_add',
|
||||
inputs={"X": grad,
|
||||
"Y": regularization_term},
|
||||
outputs={"Out": grad})
|
||||
params_and_grads.append((param, grad))
|
||||
|
||||
return params_and_grads
|
||||
|
||||
|
||||
class WeightDecayRegularizer(object):
|
||||
"""Base class for weight decay regularizers
|
||||
|
||||
Defines the common interface of weight-decay regularizers.
|
||||
Weight-decay regularizers are added only during the backward
|
||||
pass for faster regularization. They add operations to the network
|
||||
that correspond to gradient of the regularization function.
|
||||
Users should not use this class directly, but need to use one
|
||||
of its implementations
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, param, block):
|
||||
"""Add corresponding weight decay operations to the network
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class L2DecayRegularizer(WeightDecayRegularizer):
|
||||
"""Implements the L2 Weight Decay Regularization
|
||||
"""
|
||||
|
||||
def __init__(self, regularization_coeff=0.0):
|
||||
assert regularization_coeff is not None
|
||||
super(L2DecayRegularizer, self).__init__()
|
||||
self._regularization_coeff = regularization_coeff
|
||||
|
||||
def __call__(self, param, block):
|
||||
"""Add L2 weight decay ops to network
|
||||
|
||||
Adds L2 weight decay ops.
|
||||
L2WeightDecay = reg_coeff * parameter
|
||||
|
||||
Args:
|
||||
param: parameter variable for which regularization is applied
|
||||
block: block in which variable is to be created
|
||||
|
||||
Returns:
|
||||
new variable for weight decay
|
||||
"""
|
||||
assert isinstance(param, framework.Parameter)
|
||||
assert isinstance(block, framework.Block)
|
||||
decay = block.create_var(
|
||||
dtype="float32", shape=param.shape, lod_level=param.lod_level)
|
||||
# Append Op to calculate decay
|
||||
block.append_op(
|
||||
type='scale',
|
||||
inputs={"X": param},
|
||||
outputs={"Out": decay},
|
||||
attrs={"scale": self._regularization_coeff})
|
||||
|
||||
return decay
|
@ -0,0 +1,43 @@
|
||||
import unittest
|
||||
|
||||
import paddle.v2.framework.framework as framework
|
||||
import paddle.v2.framework.optimizer as optimizer
|
||||
import paddle.v2.framework.regularizer as regularizer
|
||||
from paddle.v2.framework.backward import append_backward_ops
|
||||
|
||||
|
||||
class TestL2DecayRegularizer(unittest.TestCase):
|
||||
def test_l2decay_regularizer(self):
|
||||
program = framework.Program()
|
||||
block = program.global_block()
|
||||
mul_x = block.create_parameter(
|
||||
dtype="float32",
|
||||
shape=[5, 10],
|
||||
lod_level=0,
|
||||
name="mul.x",
|
||||
regularizer=regularizer.L2DecayRegularizer(0.5))
|
||||
self.assertTrue(mul_x.regularizer is not None)
|
||||
self.assertTrue(
|
||||
isinstance(mul_x.regularizer, regularizer.L2DecayRegularizer))
|
||||
mul_y = block.create_var(
|
||||
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
|
||||
mul_out = block.create_var(
|
||||
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
|
||||
block.append_op(
|
||||
type="mul",
|
||||
inputs={"X": mul_x,
|
||||
"Y": mul_y},
|
||||
outputs={"Out": mul_out},
|
||||
attrs={"x_num_col_dims": 1})
|
||||
params_grads = append_backward_ops(mul_out)
|
||||
self.assertEqual(len(params_grads), 1)
|
||||
count_ops = len(block.ops)
|
||||
params_grads = optimizer.append_regularization_ops(params_grads)
|
||||
self.assertEqual(len(params_grads), 1)
|
||||
self.assertEqual(len(block.ops), count_ops + 2)
|
||||
self.assertEqual(block.ops[-1].type, 'elementwise_add')
|
||||
self.assertEqual(block.ops[-2].type, 'scale')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue