Add python API for backward regularization ops (#5135)

* Add regularizer code * Fix code
8 years ago · 8623e48ba8
parent be00b0c4d6
commit 8623e48ba8
4 changed files with 147 additions and 0 deletions
--- a/python/paddle/v2/framework/framework.py
+++ b/python/paddle/v2/framework/framework.py
@ -505,6 +505,8 @@ class Parameter(Variable):
        self.optimize_attr = kwargs.get('optimize_attr', {'learning_rate': 1.0})
        self.regularizer = kwargs.get('regularizer', None)
 # program is a global instance.
 g_program = Program()
--- a/python/paddle/v2/framework/optimizer.py
+++ b/python/paddle/v2/framework/optimizer.py
@ -2,6 +2,7 @@ from collections import defaultdict
 import paddle.v2.framework.framework as framework
 from paddle.v2.framework.backward import append_backward_ops
 from paddle.v2.framework.regularizer import append_regularization_ops
 __all__ = [
    'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
@ -161,6 +162,8 @@ class Optimizer(object):
        """
        params_grads = append_backward_ops(loss, parameter_list, no_grad_set or
                                           set())
        # Add regularization if any 
        params_grads = append_regularization_ops(params_grads)
        optimize_ops = self.create_optimization_pass(params_grads, loss)
        return optimize_ops
--- a/python/paddle/v2/framework/regularizer.py
+++ b/python/paddle/v2/framework/regularizer.py
@ -0,0 +1,99 @@
 import paddle.v2.framework.framework as framework
 __all__ = ['append_regularization_ops', 'L2DecayRegularizer']
 def append_regularization_ops(parameters_and_grads):
    """Create and add backward regularization Operators
    Creates and adds backward regularization operators in the BlockDesc.
    This will add gradients of the regularizer function to the gradients
    of the parameters and return these modified gradients. This is the
    same as implementing weight decay in optimizers for regularization.
    Args:
        parameters_and_grads: A list of (parameters, gradients) pairs
                              that need to be regularized.
    Returns:
        list of (parameters, gradients) pair with the regularized gradient
    Raises:
        Exception: Unknown regularization type
    """
    params_and_grads = []
    for param, grad in parameters_and_grads:
        # If no gradient or no regularization specified,
        # then we don't need to do anything
        if grad is None or param.regularizer is None:
            params_and_grads.append((param, grad))
            continue
        # Add variable for regularization term in grad block
        regularization_term = param.regularizer(param, grad.block)
        assert grad.shape == regularization_term.shape
        grad.block.append_op(
            type='elementwise_add',
            inputs={"X": grad,
                    "Y": regularization_term},
            outputs={"Out": grad})
        params_and_grads.append((param, grad))
    return params_and_grads
 class WeightDecayRegularizer(object):
    """Base class for weight decay regularizers
    Defines the common interface of weight-decay regularizers.
    Weight-decay regularizers are added only during the backward
    pass for faster regularization. They add operations to the network
    that correspond to gradient of the regularization function.
    Users should not use this class directly, but need to use one
    of its implementations
    """
    def __init__(self):
        pass
    def __call__(self, param, block):
        """Add corresponding weight decay operations to the network
        """
        raise NotImplementedError()
 class L2DecayRegularizer(WeightDecayRegularizer):
    """Implements the L2 Weight Decay Regularization
    """
    def __init__(self, regularization_coeff=0.0):
        assert regularization_coeff is not None
        super(L2DecayRegularizer, self).__init__()
        self._regularization_coeff = regularization_coeff
    def __call__(self, param, block):
        """Add L2 weight decay ops to network
        Adds L2 weight decay ops.
        L2WeightDecay = reg_coeff * parameter
        Args:
            param: parameter variable for which regularization is applied
            block: block in which variable is to be created
        Returns:
            new variable for weight decay
        """
        assert isinstance(param, framework.Parameter)
        assert isinstance(block, framework.Block)
        decay = block.create_var(
            dtype="float32", shape=param.shape, lod_level=param.lod_level)
        # Append Op to calculate decay
        block.append_op(
            type='scale',
            inputs={"X": param},
            outputs={"Out": decay},
            attrs={"scale": self._regularization_coeff})
        return decay
--- a/python/paddle/v2/framework/tests/test_regularizer.py
+++ b/python/paddle/v2/framework/tests/test_regularizer.py
@ -0,0 +1,43 @@
 import unittest
 import paddle.v2.framework.framework as framework
 import paddle.v2.framework.optimizer as optimizer
 import paddle.v2.framework.regularizer as regularizer
 from paddle.v2.framework.backward import append_backward_ops
 class TestL2DecayRegularizer(unittest.TestCase):
    def test_l2decay_regularizer(self):
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            regularizer=regularizer.L2DecayRegularizer(0.5))
        self.assertTrue(mul_x.regularizer is not None)
        self.assertTrue(
            isinstance(mul_x.regularizer, regularizer.L2DecayRegularizer))
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
        params_grads = append_backward_ops(mul_out)
        self.assertEqual(len(params_grads), 1)
        count_ops = len(block.ops)
        params_grads = optimizer.append_regularization_ops(params_grads)
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(block.ops), count_ops + 2)
        self.assertEqual(block.ops[-1].type, 'elementwise_add')
        self.assertEqual(block.ops[-2].type, 'scale')
 if __name__ == '__main__':
    unittest.main()