Merge pull request #11629 from reyoung/hotfix/more_api_reference_docs

Cherry Pick the documentation PRs.
7 years ago · 96f1edf70e
parent 49080ac9ae 35eb0112ba
commit 96f1edf70e
5 changed files with 480 additions and 117 deletions
--- a/python/paddle/fluid/evaluator.py
+++ b/python/paddle/fluid/evaluator.py
@ -41,7 +41,12 @@ def _clone_var_(block, var):

 class Evaluator(object):
    """
-    Base Class for all evaluators
+    Warning: better to use the fluid.metrics.* things, more
+    flexible support via pure Python and Operator, and decoupled
+    with executor. Short doc are intended to urge new user
+    start from Metrics.
+
+    Base Class for all evaluators.

    Args:
        name(str): The name of evaluator. such as, "accuracy". Used for generate
@ -69,6 +74,10 @@ class Evaluator(object):
    def reset(self, executor, reset_program=None):
        """
        reset metric states at the begin of each pass/user specified batch
+
+        Args:
+            executor(Executor|ParallelExecutor): a executor for executing the reset_program
+            reset_program(Program): a single Program for reset process
        """
        if reset_program is None:
            reset_program = Program()
@ -85,15 +94,16 @@ class Evaluator(object):
    def eval(self, executor, eval_program=None):
        """
        Evaluate the statistics merged by multiple mini-batches.
+        Args:
+            executor(Executor|ParallelExecutor): a executor for executing the eval_program
+            eval_program(Program): a single Program for eval process
        """
        raise NotImplementedError()

-    def create_state(self, suffix, dtype, shape):
+    def _create_state(self, suffix, dtype, shape):
        """
        Create state variable.

-        NOTE: It is not a public API.
-
        Args:
            suffix(str): the state suffix.
            dtype(str|core.VarDesc.VarType): the state data type
@ -113,9 +123,35 @@ class Evaluator(object):

 class ChunkEvaluator(Evaluator):
    """
+    Warning: This would be deprecated in the future. Please use fluid.metrics.ChunkEvaluator 
+    instead.
+
    Accumulate counter numbers output by chunk_eval from mini-batches and
    compute the precision recall and F1-score using the accumulated counter
    numbers.
+    For some basics of chunking, please refer to
+    'Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>'.
+
+    Args:
+        input (Variable): prediction output of the network.
+        label (Variable): label of the test data set.
+        chunk_scheme (str): can be IOB/IOE/IOBES and IO. See the chunk_eval op for details.
+        num_chunk_types (int): the number of chunk type.
+        excluded_chunk_types (list): A list including chunk type ids, indicating chunk types that are not counted.
+
+    Returns:
+        tuple: tuple containing: precision, recall, f1_score
+
+    Examples:
+        .. code-block:: python
+
+            exe = fluid.executor(place)
+            evaluator = fluid.Evaluator.ChunkEvaluator(input, label)
+            for epoch in PASS_NUM:
+                evaluator.reset(exe)
+                for data in batches:
+                    loss = exe.run(fetch_list=[cost])
+                distance, instance_error = distance_evaluator.eval(exe)
    """

    def __init__(
@ -130,11 +166,11 @@ class ChunkEvaluator(Evaluator):
        if main_program.current_block().idx != 0:
            raise ValueError("You can only invoke Evaluator in root block")

-        self.num_infer_chunks = self.create_state(
+        self.num_infer_chunks = self._create_state(
            dtype='int64', shape=[1], suffix='num_infer_chunks')
-        self.num_label_chunks = self.create_state(
+        self.num_label_chunks = self._create_state(
            dtype='int64', shape=[1], suffix='num_label_chunks')
-        self.num_correct_chunks = self.create_state(
+        self.num_correct_chunks = self._create_state(
            dtype='int64', shape=[1], suffix='num_correct_chunks')
        precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval(
            input=input,
@ -178,6 +214,8 @@ class ChunkEvaluator(Evaluator):

 class EditDistance(Evaluator):
    """
+    Warning: This would be deprecated in the future. Please use fluid.metrics.EditDistance
+    instead.
    Accumulate edit distance sum and sequence number from mini-batches and
    compute the average edit_distance and instance error of all batches.

@ -188,7 +226,8 @@ class EditDistance(Evaluator):
        ignored_tokens(list of int): Tokens that should be removed before
        calculating edit distance.

-    Example:
+    Examples:
+        .. code-block:: python

            exe = fluid.executor(place)
            distance_evaluator = fluid.Evaluator.EditDistance(input, label)
@ -210,11 +249,11 @@ class EditDistance(Evaluator):
        if main_program.current_block().idx != 0:
            raise ValueError("You can only invoke Evaluator in root block")

-        self.total_distance = self.create_state(
+        self.total_distance = self._create_state(
            dtype='float32', shape=[1], suffix='total_distance')
-        self.seq_num = self.create_state(
+        self.seq_num = self._create_state(
            dtype='int64', shape=[1], suffix='seq_num')
-        self.instance_error = self.create_state(
+        self.instance_error = self._create_state(
            dtype='int64', shape=[1], suffix='instance_error')
        distances, seq_num = layers.edit_distance(
            input=input, label=label, ignored_tokens=ignored_tokens)
@ -256,9 +295,10 @@ class EditDistance(Evaluator):

 class DetectionMAP(Evaluator):
    """
+    Warning: This would be deprecated in the future. Please use fluid.metrics.DetectionMAP
+    instead.
    Calculate the detection mean average precision (mAP).

-    TODO (Dang Qingqing): update the following doc.
    The general steps are as follows:
    1. calculate the true positive and false positive according to the input
        of detection and labels.
@ -293,7 +333,8 @@ class DetectionMAP(Evaluator):
            - 11point: the 11-point interpolated average precision.
            - integral: the natural integral of the precision-recall curve.

-    Example:
+    Examples:
+        .. code-block:: python

            exe = fluid.executor(place)
            map_evaluator = fluid.Evaluator.DetectionMAP(input,
@ -340,9 +381,10 @@ class DetectionMAP(Evaluator):
            evaluate_difficult=evaluate_difficult,
            ap_version=ap_version)

-        self.create_state(dtype='int32', shape=None, suffix='accum_pos_count')
-        self.create_state(dtype='float32', shape=None, suffix='accum_true_pos')
-        self.create_state(dtype='float32', shape=None, suffix='accum_false_pos')
+        self._create_state(dtype='int32', shape=None, suffix='accum_pos_count')
+        self._create_state(dtype='float32', shape=None, suffix='accum_true_pos')
+        self._create_state(
+            dtype='float32', shape=None, suffix='accum_false_pos')

        self.has_state = None
        var = self.helper.create_variable(
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@ -18,7 +18,7 @@ from framework import Program, default_main_program, Variable
 from . import core

 __all__ = [
-    'Executor', 'global_scope', 'scope_guard', 'switch_scope', 'fetch_var'
+    'Executor', 'global_scope', 'scope_guard', '_switch_scope', 'fetch_var'
 ]

 g_scope = core.Scope()
@ -35,7 +35,7 @@ def global_scope():
    return g_scope


-def switch_scope(scope):
+def _switch_scope(scope):
    global g_scope
    ex = g_scope
    g_scope = scope
@ -57,12 +57,27 @@ def scope_guard(scope):
    Args:
        scope: The new global/default scope.
    """
-    ex = switch_scope(scope)
+    ex = _switch_scope(scope)
    yield
-    switch_scope(ex)
+    _switch_scope(ex)


 def as_numpy(tensor):
+    """
+    Convert a Tensor to a numpy.ndarray, its only support Tensor without LoD information.
+    For higher dimensional sequence data, please use LoDTensor directly.
+    Examples:
+        >>> import paddle.fluid as fluid
+        >>> outs = executor.run(...)
+        >>> np_outs = map(lambda x: as_numpy(x), outs)
+        >>>     ...
+
+    Args:
+       tensor(Variable): a instance of Tensor
+
+    Returns:
+        numpy.ndarray
+    """
    if isinstance(tensor, list):
        return [as_numpy(t) for t in tensor]
    assert isinstance(tensor, core.LoDTensor)
@ -186,7 +201,7 @@ def fetch_var(name, scope=None, return_numpy=True):
    return tensor


-def get_program_cache_key(feed, fetch_list):
+def _get_program_cache_key(feed, fetch_list):
    feed_var_names = feed.keys()

    def to_name_str(var):
@ -205,6 +220,25 @@ def get_program_cache_key(feed, fetch_list):


 class Executor(object):
+    """
+    An Executor in Python, only support the single-GPU running. For multi-cards, please refer to
+    ParallelExecutor.
+    Python executor takes a program, add feed operators and fetch operators to this program according
+    to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides
+    the variables(or names) that user want to get after program run. Note: the executor will run all
+    operators in the program but not only the operators dependent by the fetch_list.
+    It store the global variables into the global scope, and create a local scope for the temporary 
+    variables. The local scope contents will be discarded after every minibatch forward/backward finished. 
+    But the global scope variables will be persistent through different runs.
+    All of ops in program will be running in sequence.
+
+    Args:
+        place(core.CPUPlace|core.CUDAPlace(n)): indicate the executor run on which device
+
+    Note: For debugging complicated network in parallel-GPUs, you can test it on the executor.
+    They has the exactly same arguments, and expected the same results.
+    """
+
    def __init__(self, place):
        self.place = place
        p = core.Place()
@ -213,6 +247,23 @@ class Executor(object):
        self.program_caches = dict()

    def as_lodtensor(self, data):
+        """
+        Convert numpy.ndarray to Tensor, its only support Tensor without LoD information.
+        For higher dimensional sequence data, please use LoDTensor directly.
+
+        Examples:
+            >>> import paddle.fluid as fluid
+            >>> exe = fluid.executor(fluid.CPUPlace())
+            >>> data = np.array(size=(100, 200, 300))
+            >>> np_outs = map(lambda x: exe.as_lodtensor(x), data)
+            >>>     ...
+
+        Args:
+            data(numpy.ndarray): a instance of array
+
+        Returns:
+            LoDTensor
+        """
        if isinstance(data, list):
            raise RuntimeError("Some of your feed data hold LoD information. \
                They can not be completely cast from a list of Python \
@ -304,23 +355,47 @@ class Executor(object):
            scope=None,
            return_numpy=True,
            use_program_cache=False):
-        """ Run program by this Executor. Feed data by feed map, fetch result by fetch_list.
-
+        """
+        Run program by this Executor. Feed data by feed map, fetch result by fetch_list.
        Python executor takes a program, add feed operators and fetch operators to this program according
        to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides
-        the variables(or names) that user want to get after program run. Note: the executor will run all
+        the variables(or names) that user want to get after program run.
+
+        Note: the executor will run all
        operators in the program but not only the operators dependent by the fetch_list

-        :param program: the program that need to run, if not provied, then default_main_program will be used.
-        :param feed: feed variable map, e.g. {"image": ImageData, "label": LableData}
-        :param fetch_list: a list of variable or variable names that user want to get, run will return them according
-        to this list.
-        :param feed_var_name: the name for the input variable of feed Operator.
-        :param fetch_var_name: the name for the output variable of feed Operator.
-        :param scope: the scope used to run this program, you can switch it to different scope. default is global_scope
-        :param return_numpy: if convert the fetched tensor to numpy
-        :param use_program_cache: set use_program_cache to true if program not changed compare to the last step.
-        :return: result according to fetch_list.
+        Args:
+            program(Program): the program that need to run, if not provied, then default_main_program will be used.
+            feed(dict): feed variable map, e.g. {"image": ImageData, "label": LableData}
+            fetch_list(list): a list of variable or variable names that user want to get, run will return them according to this list.
+            feed_var_name(str): the name for the input variable of feed Operator.
+            fetch_var_name(str): the name for the output variable of fetch Operator.
+            scope(Scope): the scope used to run this program, you can switch it to different scope. default is global_scope
+            return_numpy(bool): if convert the fetched tensor to numpy
+            use_program_cache(bool): set use_program_cache to true if program not changed compare to the last step.
+
+        Returns:
+
+            list(numpy.array): fetch result according to fetch_list.
+
+
+        Examples:
+
+            >>> data = layers.data(name='X', shape=[1], dtype='float32')
+            >>> hidden = layers.fc(input=data, size=10)
+            >>> layers.assign(hidden, out)
+            >>> loss = layers.mean(out)
+            >>> adam = fluid.optimizer.Adam()
+            >>> adam.minimize(loss)
+
+            >>> cpu = core.CPUPlace()
+            >>> exe = Executor(cpu)
+            >>> exe.run(default_startup_program())
+
+            >>> x = numpy.random.random(size=(10, 1)).astype('float32')
+            >>> outs = exe.run(
+            >>>     feed={'X': x},
+            >>>     fetch_list=[loss.name])
        """
        if feed is None:
            feed = {}
@ -341,7 +416,7 @@ class Executor(object):
        if scope is None:
            scope = global_scope()

-        cache_key = get_program_cache_key(feed, fetch_list)
+        cache_key = _get_program_cache_key(feed, fetch_list)
        if use_program_cache:
            cached_program = self._get_program_cache(cache_key)
            if cached_program is None:
--- a/python/paddle/fluid/layers/init.py
+++ b/python/paddle/fluid/layers/init.py
@ -28,8 +28,8 @@ import math_op_patch
 from math_op_patch import *
 import detection
 from detection import *
-import metric
-from metric import *
+import metric_op
+from metric_op import *
 from learning_rate_scheduler import *

 __all__ = []
@ -41,5 +41,5 @@ __all__ += control_flow.__all__
 __all__ += ops.__all__
 __all__ += device.__all__
 __all__ += detection.__all__
-__all__ += metric.__all__
+__all__ += metric_op.__all__
 __all__ += learning_rate_scheduler.__all__
--- a/python/paddle/fluid/layers/metric_op.py
+++ b/python/paddle/fluid/layers/metric_op.py
@ -126,7 +126,7 @@ def auc(input, label, curve='ROC', num_thresholds=200):
    topk_out, topk_indices = nn.topk(input, k=k)
    auc_out = helper.create_tmp_variable(dtype="float32")
    helper.append_op(
-        type="accuracy",
+        type="auc",
        inputs={
            "Out": [topk_out],
            "Indices": [topk_indices],
--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py