Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into update-api-reference-1

7 years ago · b77c886ed4
parent 82a4cf1960 e6654c1c1d
commit b77c886ed4
12 changed files with 517 additions and 175 deletions
--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@ -443,7 +443,7 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker {
    AddComment(R"DOC(
 Swish Activation Operator.
-$$out = \frac{x}{1 + e^{- \beta x}}$$
+$$out = \\frac{x}{1 + e^{- \beta x}}$$
 )DOC");
  }
--- a/paddle/fluid/operators/clip_by_norm_op.cc
+++ b/paddle/fluid/operators/clip_by_norm_op.cc
@ -54,10 +54,19 @@ be linearly scaled to make the L2 norm of $Out$ equal to $max\_norm$, as
 shown in the following formula:
 $$
-Out = \frac{max\_norm * X}{norm(X)},
+Out = \\frac{max\\_norm * X}{norm(X)},
 $$
 where $norm(X)$ represents the L2 norm of $X$.
 Examples:
        .. code-block:: python
            data = fluid.layer.data(
                name='data', shape=[2, 4, 6], dtype='float32')
            reshaped = fluid.layers.clip_by_norm(
                x=data, max_norm=0.5)
 )DOC");
  }
 };
--- a/paddle/fluid/operators/pool_op.cc
+++ b/paddle/fluid/operators/pool_op.cc
@ -204,8 +204,6 @@ void Pool2dOpMaker::Make() {
  // TODO(dzhwinter): need to registered layout transform function
  AddComment(R"DOC(
 Pool2d Operator.
 The pooling2d operation calculates the output based on
 the input, pooling_type and ksize, strides, paddings parameters.
 Input(X) and output(Out) are in NCHW format, where N is batch size, C is the
@ -215,19 +213,28 @@ These two elements represent height and width, respectively.
 The input(X) size and output(Out) size may be different.
 Example:
  Input:
       X shape: $(N, C, H_{in}, W_{in})$
  Output:
       Out shape: $(N, C, H_{out}, W_{out})$
  For ceil_mode = false:
       $$
-       H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
+       H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1
-       W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
+       $$
       $$
       W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
       $$
  For ceil_mode = true:
       $$
-       H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1 \\
+       H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1
-       W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1
+       $$
       $$
       W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1
       $$
 )DOC");
--- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
+++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
@ -35,10 +35,10 @@ class UniformRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
 protected:
  void Apply() override {
    AddComment(R"DOC(
-Uniform random operator
+UniformRandomBatchSizeLike operator.
 This operator initializes a tensor with the same batch_size as the Input tensor
- with random values sampled from a uniform distribution.
+with random values sampled from a uniform distribution.
 )DOC");
    AddAttr<float>("min",
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@ -1034,6 +1034,37 @@ class Block(object):
 class Program(object):
    """
    Python Program. Beneath it is a ProgramDesc, which is used for
    create c++ Program. A program is a self-contained programing
    language like container. It has at least one Block, when the
    control flow op like conditional_block, while_op is included,
    it will contains nested block.
    Please reference the framework.proto for details.
    Notes: we have default_startup_program and default_main_program
    by default, a pair of them will shared the parameters.
    The default_startup_program only run once to initialize parameters,
    default_main_program run in every minibatch and adjust the weights.
    Args:
        None
    Returns:
        Python Program
    Examples:
       .. code-block:: python
         main_program = Program()
         startup_program = Program()
         with fluid.program_guard(main_program=main_program, startup_program=startup_program):
            fluid.layers.data(name="x", shape=[-1, 784], dtype='float32')
            fluid.layers.data(name="y", shape=[-1, 1], dtype='int32')
            fluid.layers.fc(name="fc", shape=[10], dtype='float32', act="relu")
    """
    def __init__(self):
        self.desc = core.ProgramDesc()
        self.blocks = [Block(self, 0)]
@ -1099,6 +1130,8 @@ class Program(object):
    def clone(self, for_test=False):
        """Clone the Program object
        Args:
           for_test(bool): indicate whether clone for test.
        Set for_test to False when we want to clone the program for training.
        Set for_test to True when we want to clone the program for testing.
@ -1109,8 +1142,9 @@ class Program(object):
                the is_test attributes in these operators will be set to True for
                testing purposes, otherwise, they remain unchanged.
-        Returns(Program):
+        Returns:
-            The cloned Program object.
+            Program: The cloned Program object.
        """
        if for_test:
            p = self.inference_optimize()
@ -1228,6 +1262,7 @@ class Program(object):
    def copy_param_info_from(self, other):
        """
        Copy the information of parameters from other program.
        Args:
            other(Program): Other program
@ -1246,6 +1281,7 @@ class Program(object):
    def copy_data_info_from(self, other):
        """
        Copy the information of data variables from other program.
        Args:
            other(Program): Other program
@ -1299,6 +1335,7 @@ class Parameter(Variable):
    def to_string(self, throw_on_error, with_details=False):
        """
        To debug string.
        Args:
            throw_on_error(bool): raise exception when self is not initialized
                when throw_on_error is True
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@ -822,17 +822,25 @@ def max_sequence_len(rank_table):
 def lod_tensor_to_array(x, table):
-    """ Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY.
+    """ 
    Convert a LoDTensor to a LoDTensorArray.
    This function split a LoDTesnor to a LoDTensorArray according to its LoD 
    information. LoDTensorArray is an alias of C++ std::vector<LoDTensor> in 
    PaddlePaddle. The generated LoDTensorArray of this function can be further read 
    or written by `read_from_array()` and `write_to_array()` operators. However, 
    this function is generally an internal component of PaddlePaddle `DynamicRNN`. 
    Users should not use it directly.
    Args:
-        x (Variable|list): The LOD tensor to be converted to a LOD tensor array.
+        x (Variable|list): The LoDTensor to be converted to a LoDTensorArray.
        table (ParamAttr|list): The variable that stores the level of lod
                                which is ordered by sequence length in
-                                descending order.
+                                descending order. It is generally generated 
                                by `layers.lod_rank_table()` API.
    Returns:
-        Variable: The variable of type array that has been converted from a
+        Variable: The LoDTensorArray that has been converted from the input tensor.
                  tensor.
    Examples:
        .. code-block:: python
@ -897,8 +905,7 @@ def increment(x, value=1.0, in_place=True):
        in_place (bool): If the increment should be performed in-place.
    Returns:
-        Variable: The tensor variable storing the transformation of
+        Variable: The elementwise-incremented object.
                  element-wise increment of each value in the input.
    Examples:
        .. code-block:: python
@ -940,7 +947,7 @@ def array_write(x, i, array=None):
        Variable: The output LOD_TENSOR_ARRAY where the input tensor is written.
    Examples:
-        .. code-block::python
+        .. code-block:: python
          tmp = fluid.layers.zeros(shape=[10], dtype='int32')
          i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
@ -1054,14 +1061,31 @@ def equal(x, y, cond=None, **ignored):
 def array_read(array, i):
-    """This function performs the operation to read the data in as an
+    """
    This function performs the operation to read the data in as an
    LOD_TENSOR_ARRAY.
    .. code-block:: text
        Given:
        array = [0.6, 0.1, 0.3, 0.1]
        And:
        i = 2
        Then:
        output = 0.3
    Args:
-        array (Variable|list): The input tensor that will be written to an array.
+        array (Variable|list): The input tensor that store data to be read.
-        i (Variable|list): The subscript index in tensor array, that points the
+        i (Variable|list): The index of the data to be read from input array.
-                           place where data will be written to.
+
    Returns:
        Variable: The tensor type variable that has the data written to it.
    Examples:
        .. code-block:: python
@ -1154,6 +1178,13 @@ def array_length(array):
 class ConditionalBlockGuard(BlockGuard):
    """
    ConditionalBlockGuard is derived from BlockGuard. It is dedicated for 
    holding a ConditionalBlock, and helping users entering and exiting the 
    ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard 
    is generally an internal component of IfElse, users should not use it directly.
    """
    def __init__(self, block):
        if not isinstance(block, ConditionalBlock):
            raise TypeError("block should be conditional block")
@ -1875,26 +1906,26 @@ def reorder_lod_tensor_by_rank(x, rank_table):
 def is_empty(x, cond=None, **ignored):
    """
-    **Is Empty**
+    Test whether a Variable is empty.
    This layer returns the truth value of whether the variable is empty.
    Args:
-        x(Variable): Operand of *is_empty*
+        x (Variable): The Variable to be tested.
-        cond(Variable|None): Optional output variable to store the result
+        cond (Variable|None): Output parameter. Returns the test result 
-                             of *is_empty*
+                              of given 'x'. Default: None
    Returns:
-        Variable: The tensor variable storing the output of *is_empty*.
+        Variable: A bool scalar. True if 'x' is an empty Variable.
    Raises:
        TypeError: If input cond is not a variable, or cond's dtype is
-                   not bool
+                   not bool.
    Examples:
        .. code-block:: python
-          less = fluid.layers.is_empty(x=input)
+          res = fluid.layers.is_empty(x=input)
          # or:
          fluid.layers.is_empty(x=input, cond=res)
    """
    helper = LayerHelper("is_empty", **locals())
    if cond is None:
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@ -544,6 +544,41 @@ def shuffle(reader, buffer_size):
 def batch(reader, batch_size):
    """
    This layer is a reader decorator. It takes a reader and adds 
    'batching' decoration on it. When reading with the result 
    decorated reader, output data will be automatically organized 
    to the form of batches.
    Args:
        reader(Variable): The reader to be decorated with 'batching'.
        batch_size(int): The batch size.
    Returns:
        Variable: The reader which has been decorated with 'batching'.
    Examples:
        .. code-block:: python
            raw_reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
                                                           './data2.recordio'],
                                                    shapes=[(3,224,224), (1)],
                                                    lod_levels=[0, 0],
                                                    dtypes=['float32', 'int64'],
                                                    thread_num=2,
                                                    buffer_size=2)
            batch_reader = fluid.layers.batch(reader=raw_reader, batch_size=5)
            # If we read data with the raw_reader:
            #     data = fluid.layers.read_file(raw_reader)
            # We can only get data instance by instance.
            # 
            # However, if we read data with the batch_reader:
            #     data = fluid.layers.read_file(batch_reader)
            # Each 5 adjacent instances will be automatically combined together 
            # to become a batch. So what we get('data') is a batch data instead 
            # of an instance.
    """
    return __create_unshared_decorated_reader__(
        'create_batch_reader', reader, {'batch_size': int(batch_size)})
@ -589,15 +624,41 @@ def parallel(reader):
                                              {})
-def read_file(file_obj):
+def read_file(reader):
    """
    Execute the given reader and get data via it.
    A reader is also a Variable. It can be a raw reader generated by 
    `fluid.layers.open_files()` or a decorated one generated by 
    `fluid.layers.double_buffer()` and so on.
    Args:
        reader(Variable): The reader to execute.
    Returns:
        Tuple[Variable]: Data read via the given reader.
    Examples:
        .. code-block:: python
           data_file = fluid.layers.open_files(
                filenames=['mnist.recordio'],
                shapes=[(-1, 748), (-1, 1)],
                lod_levels=[0, 0],
                dtypes=["float32", "int64"])
            data_file = fluid.layers.double_buffer(
                fluid.layers.batch(data_file, batch_size=64))
            input, label = fluid.layers.read_file(data_file)
    """
    helper = LayerHelper('read_file')
    out = [
        helper.create_tmp_variable(
            stop_gradient=True, dtype='float32')
-        for _ in range(len(file_obj.desc.shapes()))
+        for _ in range(len(reader.desc.shapes()))
    ]
    helper.append_op(
-        type='read', inputs={'Reader': [file_obj]}, outputs={'Out': out})
+        type='read', inputs={'Reader': [reader]}, outputs={'Out': out})
    if len(out) == 1:
        return out[0]
    else:
--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@ -71,21 +71,40 @@ def noam_decay(d_model, warmup_steps):
 def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
-    """Applies exponential decay to the learning rate.
+    """
    Applies exponential decay to the learning rate. 
    When training a model, it is often recommended to lower the learning rate as the 
    training progresses. By using this function, the learning rate will be decayed by 
    'decay_rate' every 'decay_steps' steps.
    >>> if staircase == True:
    >>>     decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps)
    >>> else:
    >>>     decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
    ```python
    decayed_learning_rate = learning_rate *
            decay_rate ^ (global_step / decay_steps)
    ```
    Args:
-        learning_rate: A scalar float32 value or a Variable. This
+        learning_rate(Variable|float): The initial learning rate.
-          will be the initial learning rate during training
+        decay_steps(int): See the decay computation above.
-        decay_steps: A Python `int32` number.
+        decay_rate(float): The decay rate. See the decay computation above.
-        decay_rate: A Python `float` number.
+        staircase(Boolean): If True, decay the learning rate at discrete intervals.
-        staircase: Boolean. If set true, decay the learning rate every decay_steps.
+                            Default: False
    Returns:
-        The decayed learning rate
+        Variable: The decayed learning rate
    Examples:
        .. code-block:: python
          base_lr = 0.1
          sgd_optimizer = fluid.optimizer.SGD(
                learning_rate=fluid.layers.exponential_decay(
                    learning_rate=base_lr,
                    decay_steps=10000,
                    decay_rate=0.5,
                    staircase=True))
          sgd_optimizer.minimize(avg_cost)
    """
    global_step = _decay_step_counter()
@ -129,22 +148,39 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
 def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
-    """Applies inverse time decay to the initial learning rate.
+    """
    Applies inverse time decay to the initial learning rate.
-    >>> if staircase:
+    When training a model, it is often recommended to lower the learning rate as the 
    training progresses. By using this function, an inverse decay function will be 
    applied to the initial learning rate.
    >>> if staircase == True:
    >>>     decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
    >>> else:
    >>>     decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
    Args:
-        learning_rate: A scalar float32 value or a Variable. This
+        learning_rate(Variable|float): The initial learning rate.
-          will be the initial learning rate during training.
+        decay_steps(int): See the decay computation above.
-        decay_steps: A Python `int32` number.
+        decay_rate(float): The decay rate. See the decay computation above.
-        decay_rate: A Python `float` number.
+        staircase(Boolean): If True, decay the learning rate at discrete intervals.
-        staircase: Boolean. If set true, decay the learning rate every decay_steps.
+                            Default: False
    Returns:
-        The decayed learning rate
+        Variable: The decayed learning rate
    Examples:
        .. code-block:: python
          base_lr = 0.1
          sgd_optimizer = fluid.optimizer.SGD(
                learning_rate=fluid.layers.inverse_time_decay(
                    learning_rate=base_lr,
                    decay_steps=10000,
                    decay_rate=0.5,
                    staircase=True))
          sgd_optimizer.minimize(avg_cost)
    """
    global_step = _decay_step_counter()
--- a/python/paddle/fluid/layers/metric.py
+++ b/python/paddle/fluid/layers/metric.py
@ -27,8 +27,32 @@ __all__ = ['accuracy', 'auc']
 def accuracy(input, label, k=1, correct=None, total=None):
    """
    accuracy layer.
    Refer to the https://en.wikipedia.org/wiki/Precision_and_recall
    This function computes the accuracy using the input and label.
-    The output is the top k inputs and their indices.
+    If the correct label occurs in top k predictions, then correct will increment by one.
    Note: the dtype of accuracy is determined by input. the input and label dtype can be different.
    Args:
        input(Variable): The input of accuracy layer, which is the predictions of network.
          Carry LoD information is supported.
        label(Variable): The label of dataset.
        k(int): The top k predictions for each class will be checked.
        correct(Variable): The correct predictions count.
        total(Variable): The total entries count.
    Returns:
        Variable: The correct rate.
    Examples:
        .. code-block:: python
           data = fluid.layers.data(name="data", shape=[-1, 32, 32], dtype="float32")
           label = fluid.layers.data(name="data", shape=[-1,1], dtype="int32")
           predict = fluid.layers.fc(input=data, size=10)
           acc = fluid.layers.accuracy(input=predict, label=label, k=5)
    """
    helper = LayerHelper("accuracy", **locals())
    topk_out, topk_indices = nn.topk(input, k=k)
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
@ -17,7 +17,6 @@ __activations__ = [
    'sigmoid',
    'logsigmoid',
    'exp',
    'relu',
    'tanh',
    'tanh_shrink',
    'softshrink',
@ -29,7 +28,6 @@ __activations__ = [
    'sin',
    'round',
    'reciprocal',
    'log',
    'square',
    'softplus',
    'softsign',
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@ -108,16 +108,29 @@ def create_global_var(shape,
                      force_cpu=False,
                      name=None):
    """
-    Create a global variable. such as global_step
+    Create a new variable in the global block(block 0).
    Args:
        shape(list[int]): shape of the variable
-        value(float): the value of the variable
+        value(float): the value of the variable. The new created 
-        dtype(string): element type of the parameter
+                      variable will be filled with it.
-        persistable(bool): if this variable is persistable
+        dtype(string): data type of the variable
-        force_cpu(bool): force this variable to be on CPU
+        persistable(bool): if this variable is persistable. 
                           Default: False
        force_cpu(bool): force this variable to be on CPU. 
                         Default: False
        name(str|None): The name of the variable. If set to None the variable 
                        name will be generated automatically. 
                        Default: None
    Returns:
        Variable: the created Variable
    Examples:
        .. code-block:: python
            var = fluid.create_global_var(shape=[2,3], value=1.0, dtype='float32', 
                                 persistable=True, force_cpu=True, name='new_var')
    """
    helper = LayerHelper("global_var", **locals())
    var = helper.create_global_variable(
@ -175,7 +188,8 @@ def concat(input, axis=0, name=None):
    Examples:
        .. code-block:: python
-          out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
+        
           out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
    """
    helper = LayerHelper('concat', **locals())
    out = helper.create_tmp_variable(dtype=helper.input_dtype())
@ -188,19 +202,21 @@ def concat(input, axis=0, name=None):
 def sums(input, out=None):
-    """This function performs the sum operation on the input and returns the
+    """
    This function performs the sum operation on the input and returns the
    result as the output.
    Args:
        input (Variable|list): The input tensor that has the elements
                               that need to be summed up.
        out (Variable|None): Output parameter. The sum result.
                             Default: None
    Returns:
-        Variable: The tensor type variable that has the sum of input
+        Variable: the sum of input. The same as the argument 'out'
                  written to it.
    Examples:
-        .. code-block::python
+        .. code-block:: python
          tmp = fluid.layers.zeros(shape=[10], dtype='int32')
          i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
@ -371,13 +387,13 @@ def argmin(x, axis=0):
        x(Variable): The input to compute the indices of
                     the min elements.
        axis(int): Axis to compute indices along.
-    
+
    Returns:
        Variable: The tensor variable storing the output
-    
+
    Examples:
        .. code-block:: python
-          
+
          out = fluid.layers.argmin(x=in, axis=0)
          out = fluid.layers.argmin(x=in, axis=-1)  
    """
@ -402,13 +418,13 @@ def argmax(x, axis=0):
        x(Variable): The input to compute the indices of
                     the max elements.
        axis(int): Axis to compute indices along.
-    
+
    Returns:
        Variable: The tensor variable storing the output
-    
+
    Examples:
        .. code-block:: python
-          
+
          out = fluid.layers.argmax(x=in, axis=0)
          out = fluid.layers.argmax(x=in, axis=-1)  
    """
@ -456,11 +472,12 @@ def zeros(shape, dtype, force_cpu=False):
    It also sets *stop_gradient* to True.
    Args:
-        shape(tuple|list|None): Shape of output tensor
+        shape(tuple|list|None): Shape of output tensor.
-        dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor
+        dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor.
        force_cpu(bool, default False): Whether to make output stay on CPU.
    Returns:
-        Variable: The tensor variable storing the output
+        Variable: The tensor variable storing the output.
    Examples:
        .. code-block:: python