!12291 fix some wrong descriptions of the API docs

From: @tiancixiao Reviewed-by: @pandoublefeng,@liucunwei Signed-off-by: @pandoublefeng,@liucunwei
4 years ago · 2e9a52fc5c
parent 96cea98864 3e85e124d7
commit 2e9a52fc5c
4 changed files with 74 additions and 67 deletions
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@ -1732,10 +1732,7 @@ class MappableDataset(SourceDataset):
            new_sampler (Sampler): The sampler to use for the current dataset.

        Examples:
-            >>> # Note: A SequentialSampler is created by default
-            >>> dataset = ds.ImageFolderDataset(image_folder_dataset_dir)
-            >>>
-            >>> # Use a DistributedSampler instead of the SequentialSampler
+            >>> # use a DistributedSampler instead
            >>> new_sampler = ds.DistributedSampler(10, 2)
            >>> dataset.use_sampler(new_sampler)
        """
@ -2888,15 +2885,15 @@ class MnistDataset(MappableDataset):

    The generated dataset has two columns ['image', 'label'].
    The type of the image tensor is uint8. The label is a scalar uint32 tensor.
-    This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive. The table
+    This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive. The table
    below shows what input arguments are allowed and their expected behavior.

    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
       :widths: 25 25 50
       :header-rows: 1

-       * - Parameter 'sampler'
-         - Parameter 'shuffle'
+       * - Parameter `sampler`
+         - Parameter `shuffle`
         - Expected Order Behavior
       * - None
         - None
@ -2937,19 +2934,19 @@ class MnistDataset(MappableDataset):
        dataset_dir (str): Path to the root directory that contains the dataset.
        usage (str, optional): Usage of this dataset, can be "train", "test" or "all" . "train" will read from 60,000
            train samples, "test" will read from 10,000 test samples, "all" will read from all 70,000 samples.
-            (default=None, all samples)
+            (default=None, will read all samples)
        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, all images).
+            (default=None, will read all images).
        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, set in the config).
+            (default=None, will use value set in the config).
        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
            (default=None, expected order behavior shown in the table).
        sampler (Sampler, optional): Object used to choose samples from the
            dataset (default=None, expected order behavior shown in the table).
        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
            (default=None, which means no cache is used).

@ -3587,15 +3584,15 @@ class ManifestDataset(MappableDataset):
    The shape of the image column is [image_size] if decode flag is False, or [H,W,C]
    otherwise.
    The type of the image tensor is uint8. The label is a scalar uint64 tensor.
-    This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive. The table
+    This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive. The table
    below shows what input arguments are allowed and their expected behavior.

-    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
       :widths: 25 25 50
       :header-rows: 1

-       * - Parameter 'sampler'
-         - Parameter 'shuffle'
+       * - Parameter `sampler`
+         - Parameter `shuffle`
         - Expected Order Behavior
       * - None
         - None
@ -3618,11 +3615,11 @@ class ManifestDataset(MappableDataset):

    Args:
        dataset_file (str): File to be read.
-        usage (str, optional): acceptable usages include train, eval and inference (default="train").
+        usage (str, optional): Acceptable usages include "train", "eval" and "inference" (default="train").
        num_samples (int, optional): The number of images to be included in the dataset.
-            (default=None, all images).
+            (default=None, will include all images).
        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
+            (default=None, will use value set in the config).
        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
            order behavior shown in the table).
        sampler (Sampler, optional): Object used to choose samples from the
@ -3632,10 +3629,10 @@ class ManifestDataset(MappableDataset):
            class will be given a unique index starting from 0).
        decode (bool, optional): decode the images after reading (default=False).
        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, 'num_samples' reflects
+            into (default=None). When this argument is specified, `num_samples` reflects
            the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
            (default=None, which means no cache is used).

@ -4195,7 +4192,8 @@ class CocoDataset(MappableDataset):
    """
    A source dataset for reading and parsing COCO dataset.

-    CocoDataset support four kinds of task: 2017 Train/Val/Test Detection, Keypoints, Stuff, Panoptic.
+    `CocoDataset` supports four kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation and
+    Panoptic Segmentation of 2017 Train/Val/Test dataset.

    The generated dataset has multi-columns :

@ -4339,11 +4337,12 @@ class CocoDataset(MappableDataset):

 class CelebADataset(MappableDataset):
    """
-    A source dataset for reading and parsing CelebA dataset. Currently supported: list_attr_celeba.txt only.
+    A source dataset for reading and parsing CelebA dataset. Only support to read `list_attr_celeba.txt` currently,
+    which is the attribute annotations of the dataset.

    Note:
        The generated dataset has two columns ['image', 'attr'].
-        The type of the image tensor is uint8. The attribute tensor is uint32 and one hot type.
+        The image tensor is of the uint8 type. The attribute tensor is of the uint32 type and one hot encoded.

    Citation of CelebA dataset.

@ -4376,20 +4375,20 @@ class CelebADataset(MappableDataset):

    Args:
        dataset_dir (str): Path to the root directory that contains the dataset.
-        num_parallel_workers (int, optional): Number of workers to read the data (default=value set in the config).
+        num_parallel_workers (int, optional): Number of workers to read the data (default=None, will use value set in
+            the config).
        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None).
-        usage (str): one of 'all', 'train', 'valid' or 'test'.
+        usage (str): one of 'all', 'train', 'valid' or 'test' (default='all', will read all samples).
        sampler (Sampler, optional): Object used to choose samples from the dataset (default=None).
        decode (bool, optional): decode the images after reading (default=False).
-        extensions (list[str], optional): List of file extensions to be
-            included in the dataset (default=None).
-        num_samples (int, optional): The number of images to be included in the dataset.
-            (default=None, all images).
+        extensions (list[str], optional): List of file extensions to be included in the dataset (default=None).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will include all images).
        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, 'num_samples' reflects
+            into (default=None). When this argument is specified, `num_samples` reflects
            the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
            (default=None, which means no cache is used).

--- a/mindspore/dataset/text/transforms.py
+++ b/mindspore/dataset/text/transforms.py
@ -63,6 +63,7 @@ class TextTensorOperation(TensorOperation):
    """
    Base class of Text Tensor Ops
    """
+
    def __call__(self, input_tensor):
        if not isinstance(input_tensor, list):
            input_list = [input_tensor]
@ -95,13 +96,11 @@ DE_C_INTER_JIEBA_MODE = {
    JiebaMode.HMM: cde.JiebaMode.DE_JIEBA_HMM
 }

-
 DE_C_INTER_SENTENCEPIECE_LOADTYPE = {
    SPieceTokenizerLoadType.FILE: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KFILE,
    SPieceTokenizerLoadType.MODEL: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KMODEL
 }

-
 DE_C_INTER_SENTENCEPIECE_OUTTYPE = {
    SPieceTokenizerOutType.STRING: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KString,
    SPieceTokenizerOutType.INT: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KINT
@ -282,7 +281,7 @@ class Lookup(TextTensorOperation):
        vocab (Vocab): A vocabulary object.
        unknown_token (str, optional): Word used for lookup if the word being looked up is out-of-vocabulary (OOV).
            If unknown_token is OOV, a runtime error will be thrown (default=None).
-        data_type (mindspore.dtype, optional): mindspore.dtype that lookup maps string to (default=mstype.int32)
+        data_type (mindspore.dtype, optional): mindspore.dtype that lookup maps string to (default=mindspore.int32)

    Examples:
        >>> # Load vocabulary from list
@ -309,18 +308,19 @@ class Ngram(TextTensorOperation):
    Refer to https://en.wikipedia.org/wiki/N-gram#Examples for an overview of what n-gram is and how it works.

    Args:
-        n (list[int]):  n in n-gram, n >= 1. n is a list of positive integers. For example, if n=[4,3], then the result
+        n (list[int]): n in n-gram, which is a list of positive integers. For example, if n=[4, 3], then the result
            would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up
-            for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore","best"] will result in
+            for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore", "best"] will result in
            an empty string produced.
-        left_pad (tuple, optional): ("pad_token", pad_width). Padding performed on left side of the sequence. pad_width
-            will be capped at n-1. left_pad=("_",2) would pad left side of the sequence with "__" (default=None).
-        right_pad (tuple, optional): ("pad_token", pad_width). Padding performed on right side of the sequence.
-            pad_width will be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--"
-            (default=None).
-        separator (str, optional): symbol used to join strings together. For example. if 2-gram is
+        left_pad (tuple, optional): Padding performed on left side of the sequence shaped like ("pad_token", pad_width).
+            `pad_width` will be capped at n-1. For example, specifying left_pad=("_", 2) would pad left side of the
+            sequence with "__" (default=None).
+        right_pad (tuple, optional): Padding performed on right side of the sequence shaped like
+            ("pad_token", pad_width). `pad_width` will be capped at n-1. For example, specifying right_pad=("-", 2)
+            would pad right side of the sequence with "--" (default=None).
+        separator (str, optional): Symbol used to join strings together. For example. if 2-gram is
            ["mindspore", "amazing"] with separator="-", the result would be ["mindspore-amazing"]
-            (default=None, which means whitespace is used).
+            (default=None, which will use whitespace as separator).

    Examples:
        >>> text_file_dataset = text_file_dataset.map(operations=text.Ngram(3, separator=""))
@ -389,6 +389,7 @@ class SlidingWindow(TextTensorOperation):
        >>> # |   [3,4,5]]  |
        >>> # +--------------+
    """
+
    @check_slidingwindow
    def __init__(self, width, axis=0):
        self.width = width
@ -557,6 +558,7 @@ class PythonTokenizer:
        tokens = self.tokenizer(in_array)
        return tokens

+
 if platform.system().lower() != 'windows':
    DE_C_INTER_NORMALIZE_FORM = {
        NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE,
@ -575,12 +577,12 @@ if platform.system().lower() != 'windows':
            BasicTokenizer is not supported on Windows platform yet.

        Args:
-            lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation
+            lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8 with `NFD` mode, RegexReplace operation
                on input text to fold the text to lower case and strip accents characters. If False, only apply
-                NormalizeUTF8('normalization_form' mode) operation on input text (default=False).
-            keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False).
+                NormalizeUTF8 operation with the specified mode on input text (default=False).
+            keep_whitespace (bool, optional): If True, the whitespace will be kept in output tokens (default=False).
            normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode. This is
-                only effective when 'lower_case' is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE).
+                only effective when `lower_case` is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE).
            preserve_unused_token (bool, optional): If True, do not split special tokens like
                '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True).
            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
@ -637,14 +639,14 @@ if platform.system().lower() != 'windows':
            vocab (Vocab): A vocabulary object.
            suffix_indicator (str, optional): Used to show that the subword is the last part of a word (default='##').
            max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split (default=100).
-            unknown_token (str, optional): When a token cannot be found: if 'unknown_token' is empty string,
-                return the token directly, else return 'unknown_token'(default='[UNK]').
-            lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation
+            unknown_token (str, optional): When an unknown token is found, return the token directly if `unknown_token`
+                is an empty string, else return `unknown_token` instead (default='[UNK]').
+            lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8 with `NFD` mode, RegexReplace operation
                on input text to fold the text to lower case and strip accented characters. If False, only apply
-                NormalizeUTF8('normalization_form' mode) operation on input text (default=False).
+                NormalizeUTF8 operation with the specified mode on input text (default=False).
            keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False).
            normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode,
-                only effective when 'lower_case' is False. See NormalizeUTF8 for details (default='NONE').
+                only effective when `lower_case` is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE).
            preserve_unused_token (bool, optional): If True, do not split special tokens like
                '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True).
            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
@ -703,7 +705,8 @@ if platform.system().lower() != 'windows':

    class CaseFold(TextTensorOperation):
        """
-        Apply case fold operation on UTF-8 string tensor.
+        Apply case fold operation on UTF-8 string tensor, which is aggressive that can convert more characters into
+        lower case.

        Note:
            CaseFold is not supported on Windows platform yet.
--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@ -59,23 +59,24 @@ class OneHot(cde.OneHotOp):

 class Fill(cde.FillOp):
    """
-    Tensor operation to create a tensor filled with input scalar value.
+    Tensor operation to fill all elements in the tensor with the specified value.
    The output tensor will have the same shape and type as the input tensor.

    Args:
        fill_value (Union[str, bytes, int, float, bool])) : scalar value
-            to fill created tensor with.
+            to fill the tensor with.

    Examples:
        >>> import numpy as np
-        >>> from mindspore.dataset import GeneratorDataset
-        >>> # Generate 1d int numpy array from 0 - 63
+        >>> # generate a 1D integer numpy array from 0 to 4
        >>> def generator_1d():
-        >>>     for i in range(64):
+        ...     for i in range(5):
        ...         yield (np.array([i]),)
-        >>> generator_dataset = GeneratorDataset(generator_1d,column_names='col')
+        >>> generator_dataset = ds.GeneratorDataset(generator_1d, column_names="col1")
+        >>> # [[0], [1], [2], [3], [4]]
        >>> fill_op = c_transforms.Fill(3)
        >>> generator_dataset = generator_dataset.map(operations=fill_op)
+        >>> # [[3], [3], [3], [3], [3]]
    """

    @check_fill_value
@ -351,6 +352,8 @@ class Unique(cde.UniqueOp):
        >>> # +---------+-----------------+---------+

    """
+
+
 class Compose():
    """
    Compose a list of transforms into a single transform.
@ -376,6 +379,7 @@ class Compose():
                operations.append(op)
        return cde.ComposeOperation(operations)

+
 class RandomApply():
    """
    Randomly perform a series of transforms with a given probability.
--- a/mindspore/dataset/vision/c_transforms.py
+++ b/mindspore/dataset/vision/c_transforms.py
@ -62,6 +62,7 @@ class ImageTensorOperation(TensorOperation):
    """
    Base class of Image Tensor Ops
    """
+
    def __call__(self, input_tensor):
        if not isinstance(input_tensor, list):
            input_list = [input_tensor]
@ -93,11 +94,9 @@ DE_C_BORDER_TYPE = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
                    Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
                    Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}

-
 DE_C_IMAGE_BATCH_FORMAT = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC,
                           ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW}

-
 DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
                   Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
                   Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC,
@ -307,6 +306,7 @@ class Equalize(ImageTensorOperation):
        >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
        ...                                                 input_columns=["image"])
    """
+
    def parse(self):
        return cde.EqualizeOperation()

@ -337,6 +337,7 @@ class Invert(ImageTensorOperation):
        >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
        ...                                                 input_columns=["image"])
    """
+
    def parse(self):
        return cde.InvertOperation()

@ -729,7 +730,7 @@ class RandomCrop(ImageTensorOperation):

 class RandomCropDecodeResize(ImageTensorOperation):
    """
-    Equivalent to RandomResizedCrop, but crops before decodes.
+    A combination of `Crop`, `Decode` and `Resize`. It will get better performance for JPEG images.

    Args:
        size (Union[int, sequence]): The size of the output image.
@ -813,7 +814,7 @@ class RandomCropWithBBox(ImageTensorOperation):

    Examples:
        >>> decode_op = c_vision.Decode()
-        >>> random_crop_with_bbox_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
+        >>> random_crop_with_bbox_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200])
        >>> transforms_list = [decode_op, random_crop_with_bbox_op]
        >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
        ...                                                 input_columns=["image"])