Merge pull request #7777 from wanghaoshuang/fix_conv_group

Change default value of drop_rate in img_conv_group to 0
7 years ago · d8b923ab04
parent 9ae1523ee6 c0ed2e4157
commit d8b923ab04
1 changed files with 15 additions and 15 deletions
--- a/python/paddle/v2/fluid/nets.py
+++ b/python/paddle/v2/fluid/nets.py
@ -56,7 +56,7 @@ def img_conv_group(input,
                   conv_act=None,
                   param_attr=None,
                   conv_with_batchnorm=False,
-                   conv_batchnorm_drop_rate=None,
+                   conv_batchnorm_drop_rate=0.0,
                   pool_stride=1,
                   pool_type=None,
                   use_cudnn=True):
@ -127,21 +127,21 @@ def sequence_conv_pool(input,

 def glu(input, dim=-1):
    """
-    The gated linear unit composed by split, sigmoid activation and elementwise 
-    multiplication. Specifically, Split the input into two equal sized parts 
-    :math:`a` and :math:`b` along the given dimension and then compute as 
+    The gated linear unit composed by split, sigmoid activation and elementwise
+    multiplication. Specifically, Split the input into two equal sized parts
+    :math:`a` and :math:`b` along the given dimension and then compute as
    following:

        .. math::

            {GLU}(a, b)= a \otimes \sigma(b)

-    Refer to `Language Modeling with Gated Convolutional Networks 
+    Refer to `Language Modeling with Gated Convolutional Networks
    <https://arxiv.org/pdf/1612.08083.pdf>`_.
-    
+
    Args:
        input (Variable): The input variable which is a Tensor or LoDTensor.
-        dim (int): The dimension along which to split. If :math:`dim < 0`, the 
+        dim (int): The dimension along which to split. If :math:`dim < 0`, the
            dimension to split along is :math:`rank(input) + dim`.

    Returns:
@ -164,24 +164,24 @@ def dot_product_attention(querys, keys, values):
    """
    The dot-product attention.

-    Attention mechanism can be seen as mapping a query and a set of key-value 
-    pairs to an output. The output is computed as a weighted sum of the values, 
-    where the weight assigned to each value is computed by a compatibility 
+    Attention mechanism can be seen as mapping a query and a set of key-value
+    pairs to an output. The output is computed as a weighted sum of the values,
+    where the weight assigned to each value is computed by a compatibility
    function (dot-product here) of the query with the corresponding key.
-    
-    The dot-product attention can be implemented through (batch) matrix 
+
+    The dot-product attention can be implemented through (batch) matrix
    multipication as follows:

        .. math::

            Attention(Q, K, V)= softmax(QK^\mathrm{T})V

-    Refer to `Attention Is All You Need 
+    Refer to `Attention Is All You Need
    <https://arxiv.org/pdf/1706.03762.pdf>`_.

-    Note that batch data containing sequences with different lengths is not 
+    Note that batch data containing sequences with different lengths is not
    supported by this because of the (batch) matrix multipication.
-    
+
    Args:
        query (Variable): The input variable which is a Tensor or LoDTensor.
        key (Variable): The input variable which is a Tensor or LoDTensor.