diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index f2c1642df5..ae0dc6789e 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -4421,23 +4421,7 @@ class CelebADataset(MappableDataset): The generated dataset has two columns ['image', 'attr']. The type of the image tensor is uint8. The attr tensor is uint32 and one hot type. - Args: - dataset_dir (str): Path to the root directory that contains the dataset. - num_parallel_workers (int, optional): Number of workers to read the data (default=value set in the config). - shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None). - dataset_type (str): one of 'all', 'train', 'valid' or 'test'. - sampler (Sampler, optional): Object used to choose samples from the dataset (default=None). - decode (bool, optional): decode the images after reading (default=False). - extensions (list[str], optional): List of file extensions to be - included in the dataset (default=None). - num_samples (int, optional): The number of images to be included in the dataset. - (default=None, all images). - num_shards (int, optional): Number of shards that the dataset should be divided - into (default=None). - shard_id (int, optional): The shard ID within num_shards (default=None). This - argument should be specified only when num_shards is also specified. - - Citation of CelebA dataset. + Citation of CelebA dataset. .. code-block:: @@ -4455,9 +4439,9 @@ class CelebADataset(MappableDataset): bibsource = {dblp computer science bibliography, https://dblp.org}, howpublished = {http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html}, description = {CelebFaces Attributes Dataset (CelebA) is a large-scale face attributes dataset - with more than 200K celebrity images, each with 40 attribute annotations. The - images in this dataset cover large pose variations and background clutter. CelebA - has large diversities, large quantities, and rich annotations, including + with more than 200K celebrity images, each with 40 attribute annotations. + The images in this dataset cover large pose variations and background clutter. + CelebA has large diversities, large quantities, and rich annotations, including * 10,177 number of identities, * 202,599 number of face images, and * 5 landmark locations, 40 binary attributes annotations per image. @@ -4465,6 +4449,22 @@ class CelebADataset(MappableDataset): vision tasks: face attribute recognition, face detection, landmark (or facial part) localization, and face editing & synthesis.} } + + Args: + dataset_dir (str): Path to the root directory that contains the dataset. + num_parallel_workers (int, optional): Number of workers to read the data (default=value set in the config). + shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None). + dataset_type (str): one of 'all', 'train', 'valid' or 'test'. + sampler (Sampler, optional): Object used to choose samples from the dataset (default=None). + decode (bool, optional): decode the images after reading (default=False). + extensions (list[str], optional): List of file extensions to be + included in the dataset (default=None). + num_samples (int, optional): The number of images to be included in the dataset. + (default=None, all images). + num_shards (int, optional): Number of shards that the dataset should be divided + into (default=None). + shard_id (int, optional): The shard ID within num_shards (default=None). This + argument should be specified only when num_shards is also specified. """ @check_celebadataset @@ -4542,6 +4542,24 @@ class CLUEDataset(SourceDataset): models, corpus and leaderboard. Here we bring in classification task of CLUE, which are AFQMC, TNEWS, IFLYTEK, CMNLI, WSC and CSL. + Citation of CLUE dataset. + + .. code-block:: + + @article{CLUEbenchmark, + title = {CLUE: A Chinese Language Understanding Evaluation Benchmark}, + author = {Liang Xu, Xuanwei Zhang, Lu Li, Hai Hu, Chenjie Cao, Weitang Liu, Junyi Li, Yudong Li, + Kai Sun, Yechen Xu, Yiming Cui, Cong Yu, Qianqian Dong, Yin Tian, Dian Yu, Bo Shi, Jun Zeng, + Rongzhao Wang, Weijian Xie, Yanting Li, Yina Patterson, Zuoyu Tian, Yiwen Zhang, He Zhou, + Shaoweihua Liu, Qipeng Zhao, Cong Yue, Xinrui Zhang, Zhengliang Yang, Zhenzhong Lan}, + journal = {arXiv preprint arXiv:2004.05986}, + year = {2020}, + howpublished = {https://github.com/CLUEbenchmark/CLUE}, + description = {CLUE, a Chinese Language Understanding Evaluation benchmark. It contains eight different + tasks, including single-sentence classification, sentence pair classification, and machine + reading comprehension.} + } + Args: dataset_files (str or list[str]): String or list of files to be read or glob strings to search for a pattern of files. The list will be sorted in a lexicographical order. @@ -4564,24 +4582,6 @@ class CLUEDataset(SourceDataset): shard_id (int, optional): The shard ID within num_shards (default=None). This argument should be specified only when num_shards is also specified. - Citation of CLUE dataset. - - .. code-block:: - - @article{CLUEbenchmark, - title = {CLUE: A Chinese Language Understanding Evaluation Benchmark}, - author = {Liang Xu, Xuanwei Zhang, Lu Li, Hai Hu, Chenjie Cao, Weitang Liu, Junyi Li, Yudong Li, - Kai Sun, Yechen Xu, Yiming Cui, Cong Yu, Qianqian Dong, Yin Tian, Dian Yu, Bo Shi, Jun Zeng, - Rongzhao Wang, Weijian Xie, Yanting Li, Yina Patterson, Zuoyu Tian, Yiwen Zhang, He Zhou, - Shaoweihua Liu, Qipeng Zhao, Cong Yue, Xinrui Zhang, Zhengliang Yang, Zhenzhong Lan}, - journal = {arXiv preprint arXiv:2004.05986}, - year = {2020}, - howpublished = {https://github.com/CLUEbenchmark/CLUE}, - description = {CLUE, a Chinese Language Understanding Evaluation benchmark. It contains eight different - tasks, including single-sentence classification, sentence pair classification, and machine - reading comprehension.} - } - Examples: >>> import mindspore.dataset as ds >>> dataset_files = ["/path/to/1", "/path/to/2"] # contains 1 or multiple text files