Merge pull request #11383 from jacquesqiao/update-api-reference-1

update split_lod_tensor, create_array and array_length doc
7 years ago · 16a0f746f8
parent ce60bbf506 46ae1c93c2
commit 16a0f746f8
9 changed files with 253 additions and 101 deletions
--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@ -133,7 +133,7 @@ $out = \max(x, 0)$
 __attribute__((unused)) constexpr char TanhDoc[] = R"DOC(
 Tanh Activation Operator.

-$$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
+$$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$

 )DOC";

--- a/paddle/fluid/operators/detection/polygon_box_transform_op.cc
+++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc
@ -83,11 +83,13 @@ class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker {

    AddComment(R"DOC(
 PolygonBoxTransform Operator.
+
+PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate.
+
 The input is the final geometry output in detection network.
 We use 2*n numbers to denote the coordinate shift from n corner vertices of
 the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi),
 the geometry output contains 2*n channels.
-PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate.
 )DOC");
  }
 };
--- a/paddle/fluid/operators/shape_op.cc
+++ b/paddle/fluid/operators/shape_op.cc
@ -36,10 +36,13 @@ class ShapeOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("Input", "(Tensor), The input tensor.");
-    AddOutput("Out", "(Tensor), The shape of input tensor.");
+    AddOutput("Out",
+              "(Tensor), The shape of input tensor, the data type of the shape"
+              " is int64_t, will be on the same device with the input Tensor.");
    AddComment(R"DOC(
-Shape Operator. 
-Get the shape of input tensor.
+Shape Operator
+
+Get the shape of input tensor. Only support CPU input Tensor now.
 )DOC");
  }
 };
--- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc
+++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc
@ -113,14 +113,14 @@ The logistic loss is given as follows:

       $$loss = -Labels * \log(\sigma(X)) - (1 - Labels) * \log(1 - \sigma(X))$$

-We know that $$\sigma(X) = (1 / (1 + \exp(-X)))$$. By substituting this we get:
+We know that $$\sigma(X) = \\frac{1}{1 + \exp(-X)}$$. By substituting this we get:

       $$loss = X - X * Labels + \log(1 + \exp(-X))$$

 For stability and to prevent overflow of $$\exp(-X)$$ when X < 0,
 we reformulate the loss as follows:

-       $$loss = \max(X, 0) - X * Labels + \log(1 + \exp(-|X|))$$
+       $$loss = \max(X, 0) - X * Labels + \log(1 + \exp(-\|X\|))$$

 Both the input `X` and `Labels` can carry the LoD (Level of Details) information.
 However the output only shares the LoD with input `X`.
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@ -55,34 +55,36 @@ __all__ = [

 def split_lod_tensor(input, mask, level=0):
    """
-    **split_lod_tensor**
-
    This function takes in an input that contains the complete lod information,
    and takes in a mask which is used to mask certain parts of the input.
    The output is the true branch and the false branch with the mask applied to
-    the input at a certain level in the tensor.
+    the input at a certain level in the tensor. Mainly used in IfElse to split
+    data into two parts.

    Args:
        input(tuple|list|None): The input tensor that contains complete
                                lod information needed to construct the output.
        mask(list): A bool column vector which masks the input.
-        level(int): The specific lod level to rank.
+        level(int): The specific lod level to split.

    Returns:
-        Variable: The true branch of tensor as per the mask applied to input.
-        Variable: The false branch of tensor as per the mask applied to input.
+        tuple(Variable, Variable):
+        The true branch of tensor as per the mask applied to input.
+
+        The false branch of tensor as per the mask applied to input.

    Examples:
        .. code-block:: python

-          x = layers.data(name='x', shape=[1])
+          x = fluid.layers.data(name='x', shape=[1])
          x.persistable = True

-          y = layers.data(name='y', shape=[1])
+          y = fluid.layers.data(name='y', shape=[1])
          y.persistable = True

-          out_true, out_false = layers.split_lod_tensor(
+          out_true, out_false = fluid.layers.split_lod_tensor(
                input=x, mask=y, level=level)
+
    """
    helper = LayerHelper('split_lod_tensor', **locals())
    out_true = helper.create_tmp_variable(dtype=input.dtype)
@ -105,8 +107,9 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0):

    This function takes in an input :math:`x`, the True branch, the False
    branch and a binary :math:`mask`. Using this information, this function
-    merges the True and False branches of the tensor into a single Output
-    at a certain lod level indiacted by :math:`level`.
+    merges the True and False branches of the tensor into a single tensor as
+    output at a certain lod level indicated by :math:`level`. Used in IfElse
+    to merge the output if True block and False Block.

    Args:
        in_true(tuple|list|None): The True branch to be merged.
@ -114,7 +117,7 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0):
        x(tuple|list|None): The input tensor that contains complete
                            lod information needed to construct the output.
        mask(list): A bool column vector which masks the input.
-        level(int): The specific lod level to rank.
+        level(int): The specific lod level to merge.

    Returns:
        Variable: The merged output tensor.
@ -965,14 +968,17 @@ def array_write(x, i, array=None):


 def create_array(dtype):
-    """This function creates an array of type :math:`LOD_TENSOR_ARRAY` using the
-    LayerHelper.
+    """
+    **Create LoDTensorArray**
+
+    This function creates an array of LOD_TENSOR_ARRAY . It is mainly used to
+    implement RNN with array_write, array_read and While.

    Args:
-        dtype (int|float): The data type of the elements in the array.
+        dtype (int|float): The data type of the elements in the lod_tensor_array.

    Returns:
-        Variable: The tensor variable storing the elements of data type.
+        Variable: The lod_tensor_array variable storing the elements of data type.

    Examples:
        .. code-block:: python
@ -1083,10 +1089,9 @@ def array_read(array, i):
    Examples:
        .. code-block:: python

-            tmp = fluid.layers.zeros(shape=[10], dtype='int32')
-            i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
-            arr = fluid.layers.array_read(tmp, i=i)
-
+          tmp = fluid.layers.zeros(shape=[10], dtype='int32')
+          i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
+          arr = layers.array_read(tmp, i=i)
    """
    helper = LayerHelper('array_read', **locals())
    if not isinstance(
@ -1140,9 +1145,14 @@ def shrink_memory(x, i, table):


 def array_length(array):
-    """This function performs the operation to find the length of the input
+    """
+    **Get the Length of Input LoDTensorArray**
+
+    This function performs the operation to find the length of the input
    LOD_TENSOR_ARRAY.

+    Related API: array_read, array_write, While.
+
    Args:
        array (LOD_TENSOR_ARRAY): The input array that will be used
                                  to compute the length.
@ -1151,12 +1161,13 @@ def array_length(array):
        Variable: The length of the input LoDTensorArray.

    Examples:
-        .. code-block::python
+        .. code-block:: python

          tmp = fluid.layers.zeros(shape=[10], dtype='int32')
          i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
          arr = fluid.layers.array_write(tmp, i=i)
          arr_len = fluid.layers.array_length(arr)
+
    """
    helper = LayerHelper('array_length', **locals())
    tmp = helper.create_tmp_variable(dtype='int64')
@ -1247,6 +1258,42 @@ class ConditionalBlock(object):


 class Switch(object):
+    """
+    Switch class works just like a `if-elif-else`. Can be used in learning rate scheduler
+    to modify learning rate
+
+    The Semantics:
+
+    1. A `switch` control-flow checks cases one-by-one.
+
+    2. The condition of each case is a boolean value, which is a scalar Variable.
+
+    3. It runs the first matched case, or the default case if there is one.
+
+    4. Once it matches a case, it runs the corresponding branch and only that branch.
+
+    Examples:
+        .. code-block:: python
+
+            lr = fluid.layers.tensor.create_global_var(
+                shape=[1],
+                value=0.0,
+                dtype='float32',
+                persistable=True,
+                name="learning_rate")
+            one_var = tensor.fill_constant(
+                shape=[1], dtype='float32', value=1.0)
+            two_var = tensor.fill_constant(
+                shape=[1], dtype='float32', value=2.0)
+
+            with fluid.layers.control_flow.Switch() as switch:
+                with switch.case(global_step == zero_var):
+                    fluid.layers.tensor.assign(input=one_var, output=lr)
+                with switch.default():
+                    fluid.layers.tensor.assign(input=two_var, output=lr)
+
+    """
+
    def __init__(self, name=None):
        self.helper = LayerHelper('switch', name=name)
        self.inside_scope = False
@ -1276,7 +1323,8 @@ class Switch(object):
        return ConditionalBlockGuard(cond_block)

    def default(self):
-        """create a default case for this switch
+        """
+        create a default case for this switch
        """
        pre_cond_num = len(self.pre_not_conditions)
        if pre_cond_num == 0:
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@ -620,7 +620,7 @@ def prior_box(input,
              offset=0.5,
              name=None):
    """
-    **Prior box operator**
+    **Prior Box Operator**

    Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm.
    Each position of the input produce N prior boxes, N is determined by
@ -649,26 +649,30 @@ def prior_box(input,
       name(str): Name of the prior box op. Default: None.

    Returns:
-        boxes(Variable): the output prior boxes of PriorBox.
-             The layout is [H, W, num_priors, 4].
-             H is the height of input, W is the width of input,
-             num_priors is the total
-             box count of each position of input.
-        Variances(Variable): the expanded variances of PriorBox.
-             The layout is [H, W, num_priors, 4].
-             H is the height of input, W is the width of input
-             num_priors is the total
-             box count of each position of input
+        tuple: A tuple with two Variable (boxes, variances)
+
+        boxes: the output prior boxes of PriorBox.
+        The layout is [H, W, num_priors, 4].
+        H is the height of input, W is the width of input,
+        num_priors is the total
+        box count of each position of input.
+
+        variances: the expanded variances of PriorBox.
+        The layout is [H, W, num_priors, 4].
+        H is the height of input, W is the width of input
+        num_priors is the total
+        box count of each position of input


    Examples:
        .. code-block:: python
-            box, var = prior_box(
-            input=conv1,
-            image=images,
-            min_sizes=[100.],
-            flip=True,
-            clip=True)
+
+            box, var = fluid.layers.prior_box(
+                input=conv1,
+                image=images,
+                min_sizes=[100.],
+                flip=True,
+                clip=True)
    """
    helper = LayerHelper("prior_box", **locals())
    dtype = helper.input_dtype()
@ -738,11 +742,9 @@ def multi_box_head(inputs,
                   stride=1,
                   name=None):
    """
-    **Prior_boxes**
-
    Generate prior boxes for SSD(Single Shot MultiBox Detector)
    algorithm. The details of this algorithm, please refer the
-    section 2.2 of SSD paper (SSD: Single Shot MultiBox Detector)
+    section 2.2 of SSD paper `SSD: Single Shot MultiBox Detector
    <https://arxiv.org/abs/1512.02325>`_ .

    Args:
@ -783,24 +785,27 @@ def multi_box_head(inputs,
       name(str): Name of the prior box layer. Default: None.

    Returns:
-        mbox_loc(Variable): The predicted boxes' location of the inputs.
-             The layout is [N, H*W*Priors, 4]. where Priors
-             is the number of predicted boxes each position of each input.
-        mbox_conf(Variable): The predicted boxes' confidence of the inputs.
-             The layout is [N, H*W*Priors, C]. where Priors
-             is the number of predicted boxes each position of each input
-             and C is the number of Classes.
-        boxes(Variable): the output prior boxes of PriorBox.
-             The layout is [num_priors, 4]. num_priors is the total
-             box count of each position of inputs.
-        Variances(Variable): the expanded variances of PriorBox.
-             The layout is [num_priors, 4]. num_priors is the total
-             box count of each position of inputs
+        tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances)
+
+        mbox_loc: The predicted boxes' location of the inputs. The layout
+        is [N, H*W*Priors, 4]. where Priors is the number of predicted
+        boxes each position of each input.
+
+        mbox_conf: The predicted boxes' confidence of the inputs. The layout
+        is [N, H*W*Priors, C]. where Priors is the number of predicted boxes
+        each position of each input and C is the number of Classes.
+
+        boxes: the output prior boxes of PriorBox. The layout is [num_priors, 4].
+        num_priors is the total box count of each position of inputs.
+
+        variances: the expanded variances of PriorBox. The layout is
+        [num_priors, 4]. num_priors is the total box count of each position of inputs


    Examples:
        .. code-block:: python
-          mbox_locs, mbox_confs, box, var = layers.multi_box_head(
+
+          mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head(
            inputs=[conv1, conv2, conv3, conv4, conv5, conv5],
            image=images,
            num_classes=21,
--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@ -199,25 +199,28 @@ def polynomial_decay(learning_rate,
                     end_learning_rate=0.0001,
                     power=1.0,
                     cycle=False):
-    """Applies polynomial decay to the initial learning rate.
+    """
+    Applies polynomial decay to the initial learning rate.
+
+    .. code-block:: python
+
+     if cycle:
+       decay_steps = decay_steps * ceil(global_step / decay_steps)
+     else:
+       global_step = min(global_step, decay_steps)
+       decayed_learning_rate = (learning_rate - end_learning_rate) *
+            (1 - global_step / decay_steps) ^ power + end_learning_rate

-    >>> if cycle:
-    >>>     decay_steps = decay_steps * ceil(global_step / decay_steps)
-    >>> else:
-    >>>     global_step = min(global_step, decay_steps)
-    >>> decayed_learning_rate = (learning_rate - end_learning_rate) *
-    >>>                   (1 - global_step / decay_steps) ^ power +
-    >>>                   end_learning_rate
    Args:
-        learning_rate: A scalar float32 value or a Variable. This
-          will be the initial learning rate during training
-        decay_steps: A Python `int32` number.
-        end_learning_rate: A Python `float` number.
-        power: A Python `float` number
-        cycle: Boolean. If set true, decay the learning rate every decay_steps.
+        learning_rate(Variable|float32): A scalar float32 value or a Variable. This
+          will be the initial learning rate during training.
+        decay_steps(int32): A Python `int32` number.
+        end_learning_rate(float): A Python `float` number.
+        power(float): A Python `float` number.
+        cycle(bool): If set true, decay the learning rate every decay_steps.

    Returns:
-        The decayed learning rate
+        Variable: The decayed learning rate
    """
    global_step = _decay_step_counter()

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@ -1290,6 +1290,45 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):


 def softmax(input, param_attr=None, bias_attr=None, use_cudnn=True, name=None):
+    """
+    The input of the softmax layer is a 2-D tensor with shape N x K (N is the
+    batch_size, K is the dimension of input feature). The output tensor has the
+    same shape as the input tensor.
+
+    For each row of the input tensor, the softmax operator squashes the
+    K-dimensional vector of arbitrary real values to a K-dimensional vector of real
+    values in the range [0, 1] that add up to 1.
+
+    It computes the exponential of the given dimension and the sum of exponential
+    values of all the other dimensions in the K-dimensional vector input.
+    Then the ratio of the exponential of the given dimension and the sum of
+    exponential values of all the other dimensions is the output of the softmax
+    operator.
+
+    For each row :math:`i` and each column :math:`j` in Input(X), we have:
+
+    .. math::
+
+        Out[i, j] = \\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}
+
+    Args:
+        input (Variable): The input variable.
+        bias_attr (ParamAttr): attributes for bias
+        param_attr (ParamAttr): attributes for parameter
+        use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \
+        library is installed.
+
+    Returns:
+        Variable: output of softmax
+
+    Examples:
+
+        .. code-block:: python
+
+             fc = fluid.layers.fc(input=x, size=10)
+             softmax = fluid.layers.softmax(input=fc)
+
+    """
    helper = LayerHelper('softmax', **locals())
    dtype = helper.input_dtype()
    softmax_out = helper.create_tmp_variable(dtype)
@ -1951,27 +1990,57 @@ def batch_norm(input,
               moving_variance_name=None,
               do_model_average_for_mean_and_var=False):
    """
-    This function helps create an operator to implement
-    the BatchNorm layer using the configurations from the input parameters.
+    **Batch Normalization Layer**
+
+    Can be used as a normalizer function for conv2d and fully_connected operations.
+    The required data format for this layer is one of the following:
+
+    1. NHWC `[batch, in_height, in_width, in_channels]`
+
+    2. NCHW `[batch, in_channels, in_height, in_width]`
+
+    Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
+    Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_
+    for more details.
+
+    :math:`input` is the input features over a mini-batch.
+
+    ..  math::
+
+        \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
+        \ mini-batch\ mean \\\\
+        \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
+        \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
+        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
+        \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
+        y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift

    Args:
-        input (Variable): the input variable.
-        act (str): activation type
-        is_test (bool): whether to run batch_norm as test mode.
-        momentum (float): momentum
-        epsilon (float): epsilon, default 1e-05
-        param_attr (ParamAttr|None): attributes for parameter
-        bias_attr (ParamAttr|None): attributes for bias
-        data_layout (str): data layout, default NCHW
-        in_place (bool): if True, do not create tmp variable
-        use_mkldnn (bool): ${use_mkldnn_comment}
-        name (str): The name of this layer. It is optional.
-        moving_mean_name (str): The name of moving mean variable name, optional.
-        moving_variance_name (str): The name of moving variance name, optional.
-        do_model_average_for_mean_and_var (bool):
+        input(variable): The input variable which is a LoDTensor.
+        act(string, Default None): Activation type, linear|relu|prelu|...
+        is_test(bool, Default False): Used for training or training.
+        momentum(float, Default 0.9):
+        epsilon(float, Default 1e-05):
+        param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
+        bias_attr(ParamAttr): The parameter attribute for Parameter `bias`.
+        data_layout(string, default NCHW): NCHW|NHWC
+        in_place(bool, Default False): Make the input and output of batch norm reuse memory.
+        use_mkldnn(bool, Default false): ${use_mkldnn_comment}
+        name(string, Default None): A name for this layer(optional). If set None, the layer
+            will be named automatically.
+        moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
+        moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
+        do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not.

    Returns:
-        Variable: output of batch_norm layer.
+        Variable: A tensor variable which is the result after applying batch normalization on the input.
+
+    Examples:
+
+        .. code-block:: python
+
+            hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
+            hidden2 = fluid.layers.batch_norm(input=hidden1)
    """
    helper = LayerHelper('batch_norm', **locals())
    dtype = helper.input_dtype()
@ -4599,12 +4668,13 @@ def image_resize(input,
                 name=None,
                 resample='BILINEAR'):
    """
-    Resize a batch of images.
+    **Resize a Batch of Images**

    The input must be a tensor of the shape (num_batches, channels, in_h, in_w), 
    and the resizing only applies on the last two dimensions(hight and width).

    Supporting resample methods:
+
        'BILINEAR' : Bilinear interpolation

    Args:
@ -4624,8 +4694,8 @@ def image_resize(input,
                       Default: 'BILINEAR'

    Returns:
-        out (Variable): The output is a 4-D tensor of the shape
-                        (num_batches, channls, out_h, out_w).
+        Variable: The output is a 4-D tensor of the shape
+        (num_batches, channls, out_h, out_w).

    Examples:
        .. code-block:: python
@ -4709,8 +4779,8 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'):
        resample (str): resample method, default: BILINEAR.

    Returns:
-        out (Variable): The output is a 4-D tensor of the shape
-                        (num_batches, channls, out_h, out_w).
+        Variable: The output is a 4-D tensor of the shape
+        (num_batches, channls, out_h, out_w).
    """
    in_shape = input.shape
    if len(in_shape) != 4:
@ -4729,6 +4799,8 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'):

 def gather(input, index):
    """
+    **Gather Layer**
+
    Output is obtained by gathering entries of the outer-most dimension 
    of X indexed by `index` and concatenate them together.

--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@ -35,10 +35,29 @@ __all__ = [
    'argmax',
    'ones',
    'zeros',
+    'reverse',
 ]


 def create_tensor(dtype, name=None, persistable=False):
+    """
+    Create an variable, which will hold a LoDTensor with data type dtype.
+
+    Args:
+        dtype(string): 'float32'|'int32'|..., the data type of the
+            created tensor.
+        name(string): The name of the created tensor, if not set,
+            the name will be a random unique one.
+        persistable(bool): Set the persistable flag of the create tensor.
+
+    Returns:
+        Variable: The tensor variable storing the created tensor.
+
+    Examples:
+        .. code-block:: python
+
+          tensor = fluid.layers.create_tensor(dtype='float32')
+    """
    helper = LayerHelper("create_tensor", **locals())
    return helper.create_variable(
        name=helper.name, dtype=dtype, persistable=persistable)