Remove _optimized_guard of append_regularization_ops in Dgraph mode (#22271)

* polish regularization test=develop * add comment of function test=develop * rm name_scope in dygraph mode test=develop
5 years ago · a2603c5bf4
parent faba4b116a
commit a2603c5bf4
1 changed files with 51 additions and 40 deletions
--- a/python/paddle/fluid/regularizer.py
+++ b/python/paddle/fluid/regularizer.py
@ -21,35 +21,14 @@ from . import core
 __all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer']


-def append_regularization_ops(parameters_and_grads, regularization=None):
+def _create_regularization_of_grad(param, grad, regularization=None):
    """ Create and add backward regularization Operators

-    Creates and adds backward regularization operators in the BlockDesc.
-    This will add gradients of the regularizer function to the gradients
-    of the parameters and return these modified gradients. This is the
-    same as implementing weight decay in optimizers for regularization.
-
-    Args:
-        parameters_and_grads: A list of (parameters, gradients) pairs
-                              that need to be regularized.
-        regularization: A global regularizer. If the parameter is not
-                        set. It will be applied with regularizer.
-
-    Returns:
-        list[(Variable, Variable)]: list of (parameters, gradients) \
-        pair with the regularized gradient
-
-    Raises:
-        Exception: Unknown regularization type
+    Function helper of append_regularization_ops.
    """
-    params_and_grads = []
-    for param, grad in parameters_and_grads:
-        # If no gradient then we don't need to do anything
-        if grad is None:
-            params_and_grads.append((param, grad))
-            continue
-        with param.block.program._optimized_guard(
-            [param, grad]), framework.name_scope('regularization'):
+    # If no gradient or no regularization is specified,  then we don't need to do anything
+    if grad is None or (param.regularizer is None and regularization is None):
+        return grad
    regularization_term = None
    if param.regularizer is not None:
        # Add variable for regularization term in grad block
@ -57,10 +36,7 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
    elif regularization is not None:
        regularization_term = regularization(param, grad, grad.block)

-            # If no regularization specified, then we don't need to do anything
-            if regularization_term is None:
-                params_and_grads.append((param, grad))
-                continue
+    assert regularization_term is not None

    new_grad = grad
    if grad.type == core.VarDesc.VarType.SELECTED_ROWS:
@ -82,8 +58,43 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
    else:
        grad.block.append_op(type='sum', inputs=inputs, outputs=outputs)

-            params_and_grads.append((param, new_grad))
+    return new_grad
+
+
+def append_regularization_ops(parameters_and_grads, regularization=None):
+    """Create and add backward regularization Operators

+    Creates and adds backward regularization operators in the BlockDesc.
+    This will add gradients of the regularizer function to the gradients
+    of the parameters and return these modified gradients. This is the
+    same as implementing weight decay in optimizers for regularization.
+
+    Args:
+        parameters_and_grads: A list of (parameters, gradients) pairs
+                              that need to be regularized.
+        regularization: A global regularizer. If the parameter is not
+                        set. It will be applied with regularizer.
+
+    Returns:
+        list[(Variable, Variable)]: list of (parameters, gradients) \
+        pair with the regularized gradient
+
+    Raises:
+        Exception: Unknown regularization type
+    """
+    params_and_grads = []
+    if in_dygraph_mode():
+        for param, grad in parameters_and_grads:
+            new_grad = _create_regularization_of_grad(param, grad,
+                                                      regularization)
+            params_and_grads.append((param, new_grad))
+    else:
+        with framework.name_scope('regularization'):
+            for param, grad in parameters_and_grads:
+                with param.block.program._optimized_guard([param, grad]):
+                    new_grad = _create_regularization_of_grad(param, grad,
+                                                              regularization)
+                    params_and_grads.append((param, new_grad))
    return params_and_grads