!2594 dataset: Fixup docs; remove pylint disable stmts in UT

Merge pull request !2594 from cathwong/ckw_dataset_fixes
5 years ago · 74f23f8e0d
parent c6c5fa8406 277aba5326
commit 74f23f8e0d
8 changed files with 39 additions and 73 deletions
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@ -1040,7 +1040,7 @@ class Dataset:
        Args:
            columns (list[str], optional): List of columns to be used to specify the order of columns
-                (defaults=None, means all columns).
+                (default=None, means all columns).
        Returns:
            Iterator, list of ndarray.
@ -3382,7 +3382,7 @@ class ManifestDataset(MappableDataset):
        class_indexing (dict, optional): A str-to-int mapping from label name to index
            (default=None, the folder names will be sorted alphabetically and each
            class will be given a unique index starting from 0).
-        decode (bool, optional): decode the images after reading (defaults=False).
+        decode (bool, optional): decode the images after reading (default=False).
        num_shards (int, optional): Number of shards that the dataset should be divided
            into (default=None).
        shard_id (int, optional): The shard ID within num_shards (default=None). This
@ -4760,7 +4760,7 @@ class _NumpySlicesDataset:
    def process_dict(self, input_data):
        """
-        Convert the dict like data into tuple format, when input is a tuple of dict then compose it into a dict first.
+        Convert the dict like data into tuple format, when input is a tuple of dicts then compose it into a dict first.
        """
        # Convert pandas like dict(has "values" column) into General dict
        data_keys = list(input_data.keys())
--- a/mindspore/dataset/transforms/vision/c_transforms.py
+++ b/mindspore/dataset/transforms/vision/c_transforms.py
@ -202,7 +202,7 @@ class RandomHorizontalFlip(cde.RandomHorizontalFlipOp):
    Flip the input image horizontally, randomly with a given probability.
    Args:
-        prob (float): Probability of the image being flipped (default=0.5).
+        prob (float, optional): Probability of the image being flipped (default=0.5).
    """
    @check_prob
@ -217,7 +217,7 @@ class RandomHorizontalFlipWithBBox(cde.RandomHorizontalFlipWithBBoxOp):
    Maintains data integrity by also flipping bounding boxes in an object detection pipeline.
    Args:
-        prob (float): Probability of the image being flipped (default=0.5).
+        prob (float, optional): Probability of the image being flipped (default=0.5).
    """
    @check_prob
@ -231,7 +231,7 @@ class RandomVerticalFlip(cde.RandomVerticalFlipOp):
    Flip the input image vertically, randomly with a given probability.
    Args:
-        prob (float): Probability of the image being flipped (default=0.5).
+        prob (float, optional): Probability of the image being flipped (default=0.5).
    """
    @check_prob
--- a/tests/ut/data/dataset/declient.cfg
+++ b/tests/ut/data/dataset/declient.cfg
@ -4,6 +4,7 @@
   "numParallelWorkers": 4,
   "workerConnectorSize": 16,
   "opConnectorSize": 16,
-   "seed": 5489
+   "seed": 5489,
   "monitor_sampling_interval": 15
 }
--- a/tests/ut/python/dataset/test_batch.py
+++ b/tests/ut/python/dataset/test_batch.py
@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 from util import save_and_check
 import mindspore.dataset as ds
 from mindspore import log as logger
 from util import save_and_check
 # Note: Number of rows in test.data dataset:  12
 DATA_DIR = ["../data/dataset/testTFTestAllTypes/test.data"]
@ -434,7 +433,6 @@ def test_batch_exception_11():
        assert "drop_remainder" in str(e)
 # pylint: disable=redundant-keyword-arg
 def test_batch_exception_12():
    """
    Test batch exception: wrong input order, drop_remainder wrongly used as batch_size
@ -447,12 +445,12 @@ def test_batch_exception_12():
    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR)
    try:
-        data1 = data1.batch(drop_remainder, batch_size=batch_size)
+        data1 = data1.batch(drop_remainder, batch_size)
        sum([1 for _ in data1])
    except Exception as e:
        logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "batch_size" in str(e)
+        assert "drop_remainder" in str(e)
 def test_batch_exception_13():
--- a/tests/ut/python/dataset/test_center_crop.py
+++ b/tests/ut/python/dataset/test_center_crop.py
@ -109,23 +109,18 @@ def test_center_crop_comp(height=375, width=375, plot=False):
        visualize_list(image_c_cropped, image_py_cropped, visualize_mode=2)
 # pylint: disable=unnecessary-lambda
 def test_crop_grayscale(height=375, width=375):
    """
    Test that centercrop works with pad and grayscale images
    """
-    def channel_swap(image):
+    # Note: image.transpose performs channel swap to allow py transforms to
-        """
+    # work with c transforms
        Py func hack for our pytransforms to work with c transforms
        """
        return (image.transpose(1, 2, 0) * 255).astype(np.uint8)
    transforms = [
        py_vision.Decode(),
        py_vision.Grayscale(1),
        py_vision.ToTensor(),
-        (lambda image: channel_swap(image))
+        (lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8))
    ]
    transform = py_vision.ComposeOp(transforms)
--- a/tests/ut/python/dataset/test_config.py
+++ b/tests/ut/python/dataset/test_config.py
@ -37,6 +37,7 @@ def test_basic():
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    prefetch_size_original = ds.config.get_prefetch_size()
    seed_original = ds.config.get_seed()
    monitor_sampling_interval_original = ds.config.get_monitor_sampling_interval()
    ds.config.load('../data/dataset/declient.cfg')
@ -45,23 +46,27 @@ def test_basic():
    # assert ds.config.get_worker_connector_size() == 16
    assert ds.config.get_prefetch_size() == 16
    assert ds.config.get_seed() == 5489
    # assert ds.config.get_monitor_sampling_interval() == 15
    # ds.config.set_rows_per_buffer(1)
    ds.config.set_num_parallel_workers(2)
    # ds.config.set_worker_connector_size(3)
    ds.config.set_prefetch_size(4)
    ds.config.set_seed(5)
    ds.config.set_monitor_sampling_interval(45)
    # assert ds.config.get_rows_per_buffer() == 1
    assert ds.config.get_num_parallel_workers() == 2
    # assert ds.config.get_worker_connector_size() == 3
    assert ds.config.get_prefetch_size() == 4
    assert ds.config.get_seed() == 5
    assert ds.config.get_monitor_sampling_interval() == 45
    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_prefetch_size(prefetch_size_original)
    ds.config.set_seed(seed_original)
    ds.config.set_monitor_sampling_interval(monitor_sampling_interval_original)
 def test_get_seed():
@ -150,7 +155,7 @@ def test_deterministic_run_fail():
 def test_deterministic_run_pass():
    """
-    Test deterministic run with with setting the seed
+    Test deterministic run with setting the seed
    """
    logger.info("test_deterministic_run_pass")
--- a/tests/ut/python/dataset/test_filterop.py
+++ b/tests/ut/python/dataset/test_filterop.py
@ -50,9 +50,7 @@ def test_diff_predicate_func():
 def filter_func_ge(data):
-    if data > 10:
+    return data <= 10
        return False
    return True
 def generator_1d():
@ -108,15 +106,11 @@ def test_filter_by_generator_with_repeat_after():
 def filter_func_batch(data):
-    if data[0] > 8:
+    return data[0] <= 8
        return False
    return True
 def filter_func_batch_after(data):
-    if data > 20:
+    return data <= 20
        return False
    return True
 # test with batchOp before
@ -152,9 +146,7 @@ def test_filter_by_generator_with_batch_after():
 def filter_func_shuffle(data):
-    if data > 20:
+    return data <= 20
        return False
    return True
 # test with batchOp before
@ -169,9 +161,7 @@ def test_filter_by_generator_with_shuffle():
 def filter_func_shuffle_after(data):
-    if data > 20:
+    return data <= 20
        return False
    return True
 # test with batchOp after
@ -197,15 +187,11 @@ def generator_1d_zip2():
 def filter_func_zip(data1, data2):
    _ = data2
-    if data1 > 20:
+    return data1 <= 20
        return False
    return True
 def filter_func_zip_after(data1):
-    if data1 > 20:
+    return data1 <= 20
        return False
    return True
 # test with zipOp before
@ -247,16 +233,11 @@ def test_filter_by_generator_with_zip_after():
 def filter_func_map(col1, col2):
    _ = col2
-    if col1[0] > 8:
+    return col1[0] > 8
        return True
    return False
 # pylint: disable=simplifiable-if-statement
 def filter_func_map_part(col1):
-    if col1 < 3:
+    return col1 < 3
        return True
    return False
 def filter_func_map_all(col1, col2):
@ -311,9 +292,7 @@ def test_filter_by_generator_with_map_part_col():
 def filter_func_rename(data):
-    if data > 8:
+    return data > 8
        return True
    return False
 # test with  rename before
@ -334,15 +313,11 @@ def test_filter_by_generator_with_rename():
 # test input_column
 def filter_func_input_column1(col1, col2):
    _ = col2
-    if col1[0] < 8:
+    return col1[0] < 8
        return True
    return False
 def filter_func_input_column2(col1):
-    if col1[0] < 8:
+    return col1[0] < 8
        return True
    return False
 def filter_func_input_column3(col1):
@ -439,9 +414,7 @@ def test_filter_by_generator_Partial2():
 def filter_func_Partial(col1, col2):
    _ = col2
-    if col1[0] % 3 == 0:
+    return col1[0] % 3 == 0
        return True
    return False
 def generator_big(maxid=20):
@ -461,9 +434,7 @@ def test_filter_by_generator_Partial():
 def filter_func_cifar(col1, col2):
    _ = col1
-    if col2 % 3 == 0:
+    return col2 % 3 == 0
        return True
    return False
 # test with  cifar10
--- a/tests/ut/python/dataset/test_pad.py
+++ b/tests/ut/python/dataset/test_pad.py
@ -16,12 +16,12 @@
 Testing Pad op in DE
 """
 import numpy as np
 from util import diff_mse
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.vision.c_transforms as c_vision
 import mindspore.dataset.transforms.vision.py_transforms as py_vision
 from mindspore import log as logger
 from util import diff_mse
 DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
 SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
@ -69,23 +69,19 @@ def test_pad_op():
        assert mse < 0.01
-# pylint: disable=unnecessary-lambda
+
 def test_pad_grayscale():
    """
    Tests that the pad works for grayscale images
    """
-    def channel_swap(image):
+    # Note: image.transpose performs channel swap to allow py transforms to
-        """
+    # work with c transforms
        Py func hack for our pytransforms to work with c transforms
        """
        return (image.transpose(1, 2, 0) * 255).astype(np.uint8)
    transforms = [
        py_vision.Decode(),
        py_vision.Grayscale(1),
        py_vision.ToTensor(),
-        (lambda image: channel_swap(image))
+        (lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8))
    ]
    transform = py_vision.ComposeOp(transforms)