Expose set_gradient_clip API (#19869)

* expose set_gradient_clip, test=develop, test=document_preview, test=preview * expose gradient clip, test=develop, test=document_fix * refine doc, test=develop * follow lanxiang's comments, test=develop, test=document_fix
5 years ago · 7912e6caa1
parent 0099e54924
commit 7912e6caa1
2 changed files with 53 additions and 4 deletions
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -1061,6 +1061,7 @@ paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'p
 paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'a0ed5ce816b5d603cb595aacb922335a'))
 paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', 'ce65fe1d81dcd7067d5092a5667f35cc'))
 paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '334c6af750941a4397a2dd2ea8a4d76f'))
+paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '77ca02bb37b70d226510df9cf5e45965'))
 paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', 'e6f815a03be88dee2537707d9e6b9209'))
 paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.clip.GradientClipByValue ('paddle.fluid.clip.GradientClipByValue', ('document', 'b7a22f687269cae0c338ef3866322db7'))
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@ -24,6 +24,7 @@ from . import core
 from .dygraph.base import _not_support

 __all__ = [
+    'set_gradient_clip',
    'ErrorClipByValue',
    'GradientClipByValue',
    'GradientClipByNorm',
@ -343,12 +344,59 @@ def set_gradient_clip(clip, param_list=None, program=None):

    Args:
        clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
+                for example :ref:`api_fluid_clip_GradientClipByGlobalNorm` ,
                which describes the type and detailed attributes of required gradient clip.
-        param_list(list(Variable)): Parameters that require gradient clip.
+        param_list(list(Variable), optional): Parameters that require gradient clip.
                It can be a list of parameter or a list of parameter's name.
-                When it's None, all parameters in the program will be included.
-        program(Program): The program where parameters are.
-                Will be the default main program when assigned with None.
+                Default None, meaning that all parameters in the program will be included.
+        program(Program, optional): The program where parameters are located.
+                Default None, meaning that using :ref:`api_fluid_default_main_program` .
+
+    Returns:
+        None
+
+    Examples:
+        .. code-block:: python
+            
+            import paddle.fluid as fluid
+
+            def network():
+                image = fluid.layers.data(name='image', shape=[28], dtype='float32')
+                param_attr1 = fluid.ParamAttr("fc1_param")
+                fc1 = fluid.layers.fc(image, size=10, param_attr=param_attr1)
+                param_attr2 = fluid.ParamAttr("fc2_param")
+                fc2 = fluid.layers.fc(fc1, size=10, param_attr=param_attr2)
+                loss = fluid.layers.reduce_mean(fc2)
+                return loss
+
+
+            # network 1: clip all parameter gradient
+            with fluid.program_guard(fluid.Program(), fluid.Program()):
+                loss = network()
+                fluid.clip.set_gradient_clip(
+                    fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
+                sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+                sgd.minimize(loss)
+
+            # network 2: clip parameter gradient by name
+            with fluid.program_guard(fluid.Program(), fluid.Program()):
+                loss = network()
+                fluid.clip.set_gradient_clip(
+                    fluid.clip.GradientClipByValue(min=-1.0, max=1.0),
+                    param_list=["fc1_param", "fc2_param"])
+                sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+                sgd.minimize(loss)
+
+            # network 3: clip parameter gradient by var
+            with fluid.program_guard(fluid.Program(), fluid.Program()):
+                loss = network()
+                param_var1 = fluid.default_main_program().global_block().var("fc1_param")
+                param_var2 = fluid.default_main_program().global_block().var("fc2_param")
+                fluid.clip.set_gradient_clip(
+                    fluid.clip.GradientClipByValue(min=-1.0, max=1.0),
+                    param_list=[param_var1, param_var2])
+                sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+                sgd.minimize(loss)
    """
    if not isinstance(clip, BaseGradientClipAttr):
        raise TypeError(