Merge pull request #11531 from reyoung/feature/non_layer_api_doc

Polish Non-Layer API
7 years ago · b94f7848c4
parent 8d5ab1f9cd d1203e3822
commit b94f7848c4
8 changed files with 545 additions and 93 deletions
--- a/python/paddle/fluid/init.py
+++ b/python/paddle/fluid/init.py
@ -44,7 +44,7 @@ import metrics
 import transpiler
 from param_attr import ParamAttr, WeightNormParamAttr
 from data_feeder import DataFeeder
-from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace
+from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope
 from transpiler import DistributeTranspiler, InferenceTranspiler, \
    memory_optimize, release_memory
 from concurrency import (Go, make_channel, channel_send, channel_recv,
@ -83,6 +83,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \
              'profiler',
              'unique_name',
              'recordio_writer',
+              'Scope',
          ]


--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@ -25,6 +25,13 @@ g_scope = core.Scope()


 def global_scope():
+    """
+    Get the global/default scope instance. There are a lot of APIs use
+    :code:`global_scope` as its default value, e.g., :code:`Executor.run`
+
+    Returns:
+        Scope: The global/default scope instance.
+    """
    return g_scope


@ -37,6 +44,19 @@ def switch_scope(scope):

@contextlib.contextmanager
 def scope_guard(scope):
+    """
+    Change the global/default scope instance by Python `with` statement. All
+    variable in runtime will assigned to the new scope.
+
+    Examples:
+        >>> import paddle.fluid as fluid
+        >>> new_scope = fluid.Scope()
+        >>> with fluid.scope_guard(new_scope):
+        >>>     ...
+
+    Args:
+        scope: The new global/default scope.
+    """
    ex = switch_scope(scope)
    yield
    switch_scope(ex)
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
--- a/python/paddle/fluid/lod_tensor.py
+++ b/python/paddle/fluid/lod_tensor.py
@ -19,33 +19,41 @@ __all__ = ['create_lod_tensor', 'create_random_int_lodtensor']


 def create_lod_tensor(data, lod, place):
-    """Create a lod tensor from a numpy array, a list, or an existing lod tensor.
+    """
+    Create a lod tensor from a numpy array, a list, or an existing lod tensor.

    Create a lod tensor by doing the following:
+
    1. Check that the length-based input lod is valid.
+
    2. Convert the length-based lod to a offset-based LoD.
-    3. Copy the data from a numpy array, a list or a existing lod tensor to 
+
+    3. Copy the data from a numpy array, a list or a existing lod tensor to
       CPU or GPU device (based on input place).
+
    4. Set the level of detail (LoD) using the offset-based LoD.
    
-    Use example:
-    Suppose we want LoDTensor to hold data for sequences of word, where each word is
-    represented by an integer. If we want to create a LoDTensor to represent two 
-    sentences, one of 2 words, and one of 3 words. 
+    Examples:

-    Then 'data' can be a numpy array of integers with shape (5, 1).
-    'lod' will be [[2, 3]], indicating the length(# of words) in each sentence.
-    This length-based input lod [[2, 3]] will be converted to offset-based lod [[0, 2, 5]]
-    inside the function call.
+        Suppose we want LoDTensor to hold data for sequences of word, where each
+        word is represented by an integer. If we want to create a LoDTensor to
+        represent two  sentences, one of 2 words, and one of 3 words.

-    Please refer to 
-    github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/lod_tensor.md
-    for more details regarding LoD.
+        Then :code:`data` can be a numpy array of integers with shape (5, 1).
+        :code:`lod` will be [[2, 3]], indicating the length(# of words) in each
+        sentence. This length-based input lod [[2, 3]] will be converted to
+        offset-based lod [[0, 2, 5]] inside the function call.
+
+    Please reference :ref:`api_guide_low_level_lod_tensor` for more details
+    regarding LoD.

    Args:
-        data: a numpy array or a LoDTensor or a list holding the data to be copied.
-        lod: a list of lists indicating the length-based LoD info specified by the user. 
-        place: CPU or GPU place indicating where the data in the new LoDTensor will be stored.
+        data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a
+            list holding the data to be  copied.
+        lod(list): a list of lists indicating the length-based LoD info
+            specified by the user.
+        place(Place): CPU or GPU place indicating where the data in the new
+            LoDTensor will be stored.

    Returns:
        A fluid LoDTensor object with tensor data and lod info.
@ -77,31 +85,38 @@ def create_lod_tensor(data, lod, place):


 def create_random_int_lodtensor(lod, base_shape, place, low, high):
-    """Create a LoDTensor containing random integers.
+    """
+    Create a LoDTensor containing random integers.

-    This function is frequently used in the book examples. So we revised it based on 
-    the new create_lod_tensor API and put it here in the lod_tensor module to simplify 
-    the code. 
+    This function is frequently used in the book examples. So we revised it
+    based on the new create_lod_tensor API and put it here in the lod_tensor
+    module to simplify the code.

    The function does the following:
-    1. Calculate the overall shape of the LoDTensor based on the length-based 'lod' input 
-    and the shape of the basic element in 'base_shape'.
+
+    1. Calculate the overall shape of the LoDTensor based on the length-based
+       :code:`lod` input and the shape of the basic element in
+       :code:`base_shape`.
+
    2. Create a numpy array of this shape.
+
    3. Create the LoDTensor using create_lod_tensor API.

-    Suppose we want LoDTensor to hold data for sequences of word, where each word is
-    represented by an integer. If we want to create a LoDTensor to represent two 
-    sentences, one of 2 words, and one of 3 words. Then 'base_shape' is [1], input 
-    length-based 'lod' is [[2, 3]]. Then the overall shape of the LoDTensor would be 
-    [5, 1], holding 5 words for two sentences. 
+    Suppose we want LoDTensor to hold data for sequences of word, where each
+    word is represented by an integer. If we want to create a LoDTensor to
+    represent two sentences, one of 2 words, and one of 3 words. Then
+    'base_shape' is [1], input length-based 'lod' is [[2, 3]]. Then the overall
+    shape of the LoDTensor would be [5, 1], holding 5 words for two sentences.

    Args:
-        data: a numpy array or a LoDTensor holding the data to be copied.
-        lod: a list of lists indicating the length-based LoD info specified by the user.
-        base_shape: the shape of the basic element to be held by the LoDTensor. 
-        place: CPU or GPU place indicating where the data in the new LoDTensor will be stored.
-        low: the lower bound of the random integers.
-        high: the upper bound of the random integers.
+        lod(list): a list of lists indicating the length-based LoD info
+            specified by the user.
+        base_shape(list): the shape of the basic element to be held by the
+            LoDTensor.
+        place(Place): CPU or GPU place indicating where the data in the new
+            LoDTensor will be stored.
+        low(int): the lower bound of the random integers.
+        high(int): the upper bound of the random integers.

    Returns:
        A fluid LoDTensor object with tensor data and lod info. 
--- a/python/paddle/fluid/recordio_writer.py
+++ b/python/paddle/fluid/recordio_writer.py
@ -36,6 +36,45 @@ def convert_reader_to_recordio_file(
        compressor=core.RecordIOWriter.Compressor.Snappy,
        max_num_records=1000,
        feed_order=None):
+    """
+    Convert a Python Reader to a recordio file.
+
+    Please see :ref:`api_guide_python_reader` and :ref:`api_guide_reader_op` for
+    details.
+
+    Examples:
+
+        >>> import paddle.fluid as fluid
+        >>> import paddle.dataset.mnist as mnist
+        >>> import paddle
+        >>>
+        >>> tmp_program = fluid.Program()
+        >>> with fluid.program_guard(tmp_program):
+        >>>     img = fluid.layers.data(name='img', shape=[784])
+        >>>     label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        >>> feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace())
+        >>> # mnist.recordio will be generated in current directory
+        >>> fluid.recordio_writer.convert_reader_to_recordio_file(
+        >>>                     filename="mnist.recordio",
+        >>>                     reader_creator=paddle.batch(mnist.train(), batch_size=32),
+        >>>                     feeder=feeder)
+
+    Args:
+        filename(str): The recordio filename.
+        reader_creator(callable): The Python Reader Creator. See
+            :ref:`api_guide_python_reader`.
+        feeder(DataFeeder): The DataFeeder instance. Used to convert
+            :code:`reader_creator` to :code: `lod_tensor`
+        compressor: Must in fluid.core.RecordIOWriter.Compressor.Snappy or
+            fluid.core.RecordIOWriter.Compressor.NoCompress. Use :code:`Snappy`
+            by default.
+        max_num_records(int): Maximum number of records in one chuck. Each record
+            is each return value from reader function
+        feed_order(list): The order of variable names that the reader returns
+
+    Returns:
+        int: the number of record that saved.
+    """
    if feed_order is None:
        feed_order = feeder.feed_names
    counter = 0
@ -58,6 +97,17 @@ def convert_reader_to_recordio_files(
        compressor=core.RecordIOWriter.Compressor.Snappy,
        max_num_records=1000,
        feed_order=None):
+    """
+    convert a python reader to many recordio files.
+
+    This API is basically same as :code:`convert_reader_to_recordio_file`,
+    instead of it will create many recordio files. Each file contains at
+    most :code:`batch_per_file` records.
+
+    Please reference
+    :ref:`api_fluid_recordio_writer_convert_reader_to_recordio_file` for more
+    details.
+    """
    if feed_order is None:
        feed_order = feeder.feed_names
    f_name, f_ext = os.path.splitext(filename)
--- a/python/paddle/fluid/trainer.py
+++ b/python/paddle/fluid/trainer.py
@ -33,23 +33,59 @@ __all__ = [


 class BeginEpochEvent(object):
+    """
+    The begin of a training epoch.
+
+    Args:
+        epoch_id(int): The current epoch ID.
+    """
+
    def __init__(self, epoch_id):
        self.epoch = epoch_id


 class EndEpochEvent(object):
+    """
+    The end of a training epoch.
+
+    Args:
+        epoch_id(int): The current epoch ID.
+    """
+
    def __init__(self, epoch_id):
        self.epoch = epoch_id


 class BeginStepEvent(object):
+    """
+    The begin of a training epoch.
+
+    Args:
+        epoch_id(int): The current epoch ID.
+        step_id(int): The current step ID.
+    """
+
    def __init__(self, epoch_id, step_id):
        self.epoch = epoch_id
        self.step = step_id
        self.fetch_metrics = True
+        """
+        If fetch_metrics is true, the metrics will be fetched at the 
+        EndStepEvent. Default is True.
+        """


 class EndStepEvent(object):
+    """
+    The end of a training step.
+
+    Args:
+        epoch_id(int): The current epoch ID.
+        step_id(int): The current step ID.
+        metrics(list): A list of fetched tensor. The order of this list is same
+            as the :code:`train_func` returns.
+    """
+
    def __init__(self, epoch_id, step_id, metrics):
        self.epoch = epoch_id
        self.step = step_id
@ -57,6 +93,27 @@ class EndStepEvent(object):


 class CheckpointConfig(object):
+    """
+    Parameter object for :code:`fluid.io.save_checkpoint` and
+    :code:`fluid.Trainer`. Used to configuration how to save checkpoint.
+
+    Args:
+        checkpoint_dir(str): Directory path to save check point. Default is the
+            current directory.
+
+        max_num_checkpoints(int): The max number of local check points.
+        epoch_interval(int): Every number of epoch to save check point.
+        step_interval(int): Every number of step to save check point.
+
+    Examples:
+        >>> config = fluid.CheckpointConfig("./checkpoints")
+        >>> trainer = fluid.Trainer(train_func=train_program,
+        >>>                         place=place,
+        >>>                         optimizer_func=optimizer_func,
+        >>>                         checkpoint_config=config)
+        >>> trainer.train(...)
+    """
+
    def __init__(self,
                 checkpoint_dir=None,
                 max_num_checkpoints=3,
@ -113,11 +170,62 @@ def check_and_get_place(place):

 class Trainer(object):
    """
+    A trainer wraps MultiGPU/MultiNode training loops and can be used to train a
+    simple neural network easily.
+
+    This API takes a :code:`train_func`. A :code:`train_func` is a function that
+    return loss as it first return value. The reset value can be fetched by
+    EndStepEvent.metrics
+
+    This API also takes a :code:`optimizer_func` that will return an optimizer
+    instance.
+
+    For example, to train a MLP for MNIST dataset, the sample program is
+
+    >>> import paddle.fluid as fluid
+    >>>
+    >>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10):
+    >>>     hidden = image
+    >>>     for layer_size in layer_sizes:
+    >>>         hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation)
+    >>>     return fluid.layers.fc(input=hidden, size=num_classes, act="softmax")
+    >>>
+    >>> def train_mnist_mlp():
+    >>>     img = fluid.layers.data(name='image', shape=[784])
+    >>>     label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    >>>     prediction = mlp(img)
+    >>>     return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label))
+    >>>
+    >>> def optimizer():
+    >>>     return fluid.optimizer.Adam()
+    >>>
+    >>> trainer = Trainer(train_func=train_mnist_mlp,
+    >>>                   optimizer_func=optimizer,
+    >>>                   place=fluid.CUDAPlace(0),
+    >>>                   parallel=True)
+    >>>
+    >>> def train_callback(event):
+    >>>     if isinstance(event, fluid.EndStepEvent):
+    >>>         print "Epoch ID", event.epoch, "Step ID",\
+    >>>             event.step, "AvgLoss", event.metrics[0]
+    >>>     elif isinstance(event, fluid.EndEpochEvent):
+    >>>         trainer.save_params("./model_{0}".format(event.epoch))
+    >>>
+    >>> trainer.train(num_epochs=100, event_handler=train_callback)
+
+    For more example, please see :ref:`api_guide_high_level_api`.
+

    Args:
-        train_func(callable): A function which will return loss. The loss must be a scalar.
+        train_func(callable): A function which will return loss. The loss must be
+            a scalar tensor.
        optimizer_func(callable): A function that returns an Optimizer object.
-        place: The device place of this trainer.
+        place(CUDAPlace|CPUPlace): The device place of this trainer. If
+            :code:`parallel=True,` all CUDA Places will be used if :code:`place`
+            is a :code:`CUDAPlace`.
+        parallel(bool): True if use multiple devices.
+        checkpoint_config(CheckpointConfig): Configuration about how to save
+            checkpoints.
    """

    def __init__(self,
@ -129,9 +237,6 @@ class Trainer(object):
                 checkpoint_config=None):
        self.__stop = False
        self.parallel = parallel
-        # 1. we need to generate a framework.Program by calling
-        # program_func. Reference: fluid.program_guard in
-        # test_word2vec.py

        # config for checkpoint
        # only chief worker will save variables
@ -145,6 +250,10 @@ class Trainer(object):

        self.scope = core.Scope()

+        # 1. we need to generate a framework.Program by calling
+        # program_func. Reference: fluid.program_guard in
+        # test_word2vec.py
+
        self.startup_program = framework.Program()
        self.train_program = framework.Program()

@ -277,17 +386,18 @@ class Trainer(object):

    def train(self, num_epochs, event_handler, reader=None, feed_order=None):
        """
-        Train the model.
+        Start the train loop to train the model.

        Args:
-            num_epochs: The number of epoch. An epoch will process all data in reader
-            event_handler: The event handler. A function with type (ev:Event)->void
-            reader:
-            feed_order: Feeding order of reader. None will following the defining
+            num_epochs(int): The number of epoch. An epoch will process all data in reader
+            event_handler(callable): The event handler. A function with type (ev:Event)->void
+            reader(callable): A reader creator object. See also
+                :ref:`api_guide_python_reader` .
+            feed_order(list): Feeding order of reader. None will following the defining
                order in program

        Returns:
-
+            None
        """
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "")
        if training_role == "PSERVER":
@ -307,16 +417,24 @@ class Trainer(object):
        Test the model on given test data

        Args:
-            reader: The reader that yields test data.
-            feed_order: Feeding order of reader. None will following the defining
-                order in program
+            reader(callable): The reader that yields test data.
+            feed_order(list): Feeding order of reader. None will following the
+                defining order in program
        """

        return self._test_by_executor(reader, feed_order,
                                      self.train_func_outputs)

    def save_params(self, param_path):
-        # reference: save_persistables in io.py
+        """
+        Save all parameters into :code:`param_path`.
+
+        Args:
+            param_path(str): The path to save parameters.
+
+        Returns:
+            None
+        """
        with self._prog_and_scope_guard():
            exe = executor.Executor(self.place)
            io.save_persistables(exe, dirname=param_path)
--- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py
+++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py
@ -383,6 +383,16 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):


 def release_memory(input_program, skip_opt_set=None):
+    """
+    Modify the input program and insert :code:`delete_op` to early drop not used
+    variables. The modification will be performed inplace.
+
+    Notes: This is an experimental API and could be removed in next few
+    releases. Users should not use this API.
+
+    Args:
+        input_program(Program): The program will be inserted :code:`delete_op`.
+    """
    cfgs = _get_cfgs(input_program)
    for cfg in cfgs:
        cfg.release_memory(skip_opt_set=skip_opt_set)
--- a/python/paddle/fluid/unique_name.py
+++ b/python/paddle/fluid/unique_name.py
@ -16,7 +16,7 @@ import collections
 import contextlib
 import sys

-__all__ = ['generate', 'switch', 'guard', 'UniqueNameGenerator']
+__all__ = ['generate', 'switch', 'guard']


 class UniqueNameGenerator(object):