!1478 [Dataset] clean pylint.

pull/1478/head
Yang 5 years ago
parent c086d91aaf
commit 9b2a778d94

@ -13,8 +13,8 @@
# limitations under the License.
# ============================================================================
"""test dataset performance about mindspore.MindDataset, mindspore.TFRecordDataset, tf.data.TFRecordDataset"""
import tensorflow as tf
import time
import tensorflow as tf
import mindspore.dataset as ds
from mindspore.mindrecord import FileReader

@ -32,9 +32,9 @@ def test_apply_generator_case():
data1 = ds.GeneratorDataset(generator_1d, ["data"])
data2 = ds.GeneratorDataset(generator_1d, ["data"])
def dataset_fn(ds):
ds = ds.repeat(2)
return ds.batch(4)
def dataset_fn(ds_):
ds_ = ds_.repeat(2)
return ds_.batch(4)
data1 = data1.apply(dataset_fn)
data2 = data2.repeat(2)
@ -52,11 +52,11 @@ def test_apply_imagefolder_case():
decode_op = vision.Decode()
normalize_op = vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0])
def dataset_fn(ds):
ds = ds.map(operations=decode_op)
ds = ds.map(operations=normalize_op)
ds = ds.repeat(2)
return ds
def dataset_fn(ds_):
ds_ = ds_.map(operations=decode_op)
ds_ = ds_.map(operations=normalize_op)
ds_ = ds_.repeat(2)
return ds_
data1 = data1.apply(dataset_fn)
data2 = data2.map(operations=decode_op)
@ -67,125 +67,125 @@ def test_apply_imagefolder_case():
assert np.array_equal(item1["image"], item2["image"])
def test_apply_flow_case_0(id=0):
def test_apply_flow_case_0(id_=0):
# apply control flow operations
data1 = ds.GeneratorDataset(generator_1d, ["data"])
def dataset_fn(ds):
if id == 0:
ds = ds.batch(4)
elif id == 1:
ds = ds.repeat(2)
elif id == 2:
ds = ds.batch(4)
ds = ds.repeat(2)
def dataset_fn(ds_):
if id_ == 0:
ds_ = ds_.batch(4)
elif id_ == 1:
ds_ = ds_.repeat(2)
elif id_ == 2:
ds_ = ds_.batch(4)
ds_ = ds_.repeat(2)
else:
ds = ds.shuffle(buffer_size=4)
return ds
ds_ = ds_.shuffle(buffer_size=4)
return ds_
data1 = data1.apply(dataset_fn)
num_iter = 0
for _ in data1.create_dict_iterator():
num_iter = num_iter + 1
if id == 0:
if id_ == 0:
assert num_iter == 16
elif id == 1:
elif id_ == 1:
assert num_iter == 128
elif id == 2:
elif id_ == 2:
assert num_iter == 32
else:
assert num_iter == 64
def test_apply_flow_case_1(id=1):
def test_apply_flow_case_1(id_=1):
# apply control flow operations
data1 = ds.GeneratorDataset(generator_1d, ["data"])
def dataset_fn(ds):
if id == 0:
ds = ds.batch(4)
elif id == 1:
ds = ds.repeat(2)
elif id == 2:
ds = ds.batch(4)
ds = ds.repeat(2)
def dataset_fn(ds_):
if id_ == 0:
ds_ = ds_.batch(4)
elif id_ == 1:
ds_ = ds_.repeat(2)
elif id_ == 2:
ds_ = ds_.batch(4)
ds_ = ds_.repeat(2)
else:
ds = ds.shuffle(buffer_size=4)
return ds
ds_ = ds_.shuffle(buffer_size=4)
return ds_
data1 = data1.apply(dataset_fn)
num_iter = 0
for _ in data1.create_dict_iterator():
num_iter = num_iter + 1
if id == 0:
if id_ == 0:
assert num_iter == 16
elif id == 1:
elif id_ == 1:
assert num_iter == 128
elif id == 2:
elif id_ == 2:
assert num_iter == 32
else:
assert num_iter == 64
def test_apply_flow_case_2(id=2):
def test_apply_flow_case_2(id_=2):
# apply control flow operations
data1 = ds.GeneratorDataset(generator_1d, ["data"])
def dataset_fn(ds):
if id == 0:
ds = ds.batch(4)
elif id == 1:
ds = ds.repeat(2)
elif id == 2:
ds = ds.batch(4)
ds = ds.repeat(2)
def dataset_fn(ds_):
if id_ == 0:
ds_ = ds_.batch(4)
elif id_ == 1:
ds_ = ds_.repeat(2)
elif id_ == 2:
ds_ = ds_.batch(4)
ds_ = ds_.repeat(2)
else:
ds = ds.shuffle(buffer_size=4)
return ds
ds_ = ds_.shuffle(buffer_size=4)
return ds_
data1 = data1.apply(dataset_fn)
num_iter = 0
for _ in data1.create_dict_iterator():
num_iter = num_iter + 1
if id == 0:
if id_ == 0:
assert num_iter == 16
elif id == 1:
elif id_ == 1:
assert num_iter == 128
elif id == 2:
elif id_ == 2:
assert num_iter == 32
else:
assert num_iter == 64
def test_apply_flow_case_3(id=3):
def test_apply_flow_case_3(id_=3):
# apply control flow operations
data1 = ds.GeneratorDataset(generator_1d, ["data"])
def dataset_fn(ds):
if id == 0:
ds = ds.batch(4)
elif id == 1:
ds = ds.repeat(2)
elif id == 2:
ds = ds.batch(4)
ds = ds.repeat(2)
def dataset_fn(ds_):
if id_ == 0:
ds_ = ds_.batch(4)
elif id_ == 1:
ds_ = ds_.repeat(2)
elif id_ == 2:
ds_ = ds_.batch(4)
ds_ = ds_.repeat(2)
else:
ds = ds.shuffle(buffer_size=4)
return ds
ds_ = ds_.shuffle(buffer_size=4)
return ds_
data1 = data1.apply(dataset_fn)
num_iter = 0
for _ in data1.create_dict_iterator():
num_iter = num_iter + 1
if id == 0:
if id_ == 0:
assert num_iter == 16
elif id == 1:
elif id_ == 1:
assert num_iter == 128
elif id == 2:
elif id_ == 2:
assert num_iter == 32
else:
assert num_iter == 64
@ -195,11 +195,11 @@ def test_apply_exception_case():
# apply exception operations
data1 = ds.GeneratorDataset(generator_1d, ["data"])
def dataset_fn(ds):
ds = ds.repeat(2)
return ds.batch(4)
def dataset_fn(ds_):
ds_ = ds_.repeat(2)
return ds_.batch(4)
def exception_fn(ds):
def exception_fn():
return np.array([[0], [1], [3], [4], [5]])
try:
@ -220,12 +220,12 @@ def test_apply_exception_case():
try:
data2 = data1.apply(dataset_fn)
data3 = data1.apply(dataset_fn)
_ = data1.apply(dataset_fn)
for _, _ in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
pass
assert False
except ValueError:
pass
except ValueError as e:
logger.info("Got an exception in DE: {}".format(str(e)))
if __name__ == '__main__':

@ -58,7 +58,7 @@ def test_auto_contrast(plot=False):
ds_original = ds_original.batch(512)
for idx, (image, label) in enumerate(ds_original):
for idx, (image, _) in enumerate(ds_original):
if idx == 0:
images_original = np.transpose(image, (0, 2, 3, 1))
else:
@ -79,7 +79,7 @@ def test_auto_contrast(plot=False):
ds_auto_contrast = ds_auto_contrast.batch(512)
for idx, (image, label) in enumerate(ds_auto_contrast):
for idx, (image, _) in enumerate(ds_auto_contrast):
if idx == 0:
images_auto_contrast = np.transpose(image, (0, 2, 3, 1))
else:

@ -273,7 +273,7 @@ def test_batch_exception_01():
data1 = data1.batch(batch_size=2, drop_remainder=True, num_parallel_workers=0)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "num_parallel_workers" in str(e)
@ -290,7 +290,7 @@ def test_batch_exception_02():
data1 = data1.batch(3, drop_remainder=True, num_parallel_workers=-1)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "num_parallel_workers" in str(e)
@ -307,7 +307,7 @@ def test_batch_exception_03():
data1 = data1.batch(batch_size=0)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "batch_size" in str(e)
@ -324,7 +324,7 @@ def test_batch_exception_04():
data1 = data1.batch(batch_size=-1)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "batch_size" in str(e)
@ -341,7 +341,7 @@ def test_batch_exception_05():
data1 = data1.batch(batch_size=False)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "batch_size" in str(e)
@ -358,7 +358,7 @@ def test_batch_exception_07():
data1 = data1.batch(3, drop_remainder=0)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "drop_remainder" in str(e)
@ -375,7 +375,7 @@ def test_batch_exception_08():
data1 = data1.batch(3, drop_remainder=True, num_parallel_workers=False)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "num_parallel_workers" in str(e)
@ -392,7 +392,7 @@ def test_batch_exception_09():
data1 = data1.batch(drop_remainder=True, num_parallel_workers=4)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "batch_size" in str(e)
@ -409,7 +409,7 @@ def test_batch_exception_10():
data1 = data1.batch(batch_size=4, num_parallel_workers=8192)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "num_parallel_workers" in str(e)
@ -429,7 +429,7 @@ def test_batch_exception_11():
data1 = data1.batch(batch_size, num_parallel_workers)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "drop_remainder" in str(e)
@ -450,7 +450,7 @@ def test_batch_exception_12():
data1 = data1.batch(drop_remainder, batch_size=batch_size)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "batch_size" in str(e)
@ -469,7 +469,7 @@ def test_batch_exception_13():
data1 = data1.batch(batch_size, shard_id=1)
sum([1 for _ in data1])
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "shard_id" in str(e)

@ -24,18 +24,18 @@ from mindspore import log as logger
# In generator dataset: Number of rows is 3; its values are 0, 1, 2
def generator():
for i in range(3):
yield np.array([i]),
yield (np.array([i]),)
# In generator_10 dataset: Number of rows is 7; its values are 3, 4, 5 ... 9
def generator_10():
for i in range(3, 10):
yield np.array([i]),
yield (np.array([i]),)
# In generator_20 dataset: Number of rows is 10; its values are 10, 11, 12 ... 19
def generator_20():
for i in range(10, 20):
yield np.array([i]),
yield (np.array([i]),)
def test_concat_01():
@ -85,7 +85,7 @@ def test_concat_03():
data3 = data1 + data2
try:
for i, d in enumerate(data3):
for _, _ in enumerate(data3):
pass
assert False
except RuntimeError:
@ -104,7 +104,7 @@ def test_concat_04():
data3 = data1 + data2
try:
for i, d in enumerate(data3):
for _, _ in enumerate(data3):
pass
assert False
except RuntimeError:
@ -125,7 +125,7 @@ def test_concat_05():
data3 = data1 + data2
try:
for i, d in enumerate(data3):
for _, _ in enumerate(data3):
pass
assert False
except RuntimeError:

@ -31,7 +31,7 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
def test_basic():
"""
Test basic configuration functions
Test basic configuration functions
"""
# Save original configuration values
num_parallel_workers_original = ds.config.get_num_parallel_workers()
@ -138,7 +138,7 @@ def test_deterministic_run_fail():
for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
np.testing.assert_equal(item1["image"], item2["image"])
except BaseException as e:
except Exception as e:
# two datasets split the number out of the sequence a
logger.info("Got an exception in DE: {}".format(str(e)))
assert "Array" in str(e)
@ -157,7 +157,7 @@ def test_deterministic_run_pass():
# Save original configuration values
num_parallel_workers_original = ds.config.get_num_parallel_workers()
seed_original = ds.config.get_seed()
ds.config.set_seed(0)
ds.config.set_num_parallel_workers(1)
@ -179,7 +179,7 @@ def test_deterministic_run_pass():
try:
for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
np.testing.assert_equal(item1["image"], item2["image"])
except BaseException as e:
except Exception as e:
# two datasets both use numbers from the generated sequence "a"
logger.info("Got an exception in DE: {}".format(str(e)))
assert "Array" in str(e)
@ -344,7 +344,7 @@ def test_deterministic_python_seed_multi_thread():
try:
np.testing.assert_equal(data1_output, data2_output)
except BaseException as e:
except Exception as e:
# expect output to not match during multi-threaded excution
logger.info("Got an exception in DE: {}".format(str(e)))
assert "Array" in str(e)

@ -107,14 +107,20 @@ def test_tfrecord_shardings4(print_res=False):
assert len(result_list) == expect_length
assert set(result_list) == expect_set
check_result(sharding_config(2, 0, None, 1), 20, {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30})
check_result(sharding_config(2, 1, None, 1), 20, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40})
check_result(sharding_config(2, 0, None, 1), 20,
{11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30})
check_result(sharding_config(2, 1, None, 1), 20,
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40})
check_result(sharding_config(2, 0, 3, 1), 3, {11, 12, 21})
check_result(sharding_config(2, 1, 3, 1), 3, {1, 2, 31})
check_result(sharding_config(2, 0, 40, 1), 20, {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30})
check_result(sharding_config(2, 1, 40, 1), 20, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40})
check_result(sharding_config(2, 0, 55, 1), 20, {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30})
check_result(sharding_config(2, 1, 55, 1), 20, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40})
check_result(sharding_config(2, 0, 40, 1), 20,
{11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30})
check_result(sharding_config(2, 1, 40, 1), 20,
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40})
check_result(sharding_config(2, 0, 55, 1), 20,
{11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30})
check_result(sharding_config(2, 1, 55, 1), 20,
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40})
check_result(sharding_config(3, 0, 8, 1), 8, {32, 33, 34, 11, 12, 13, 14, 31})
check_result(sharding_config(3, 1, 8, 1), 8, {1, 2, 3, 4, 5, 6, 7, 8})
check_result(sharding_config(3, 2, 8, 1), 8, {21, 22, 23, 24, 25, 26, 27, 28})

@ -49,7 +49,7 @@ def test_textline_dataset_totext():
strs = i["text"].item().decode("utf8")
assert strs == line[count]
count += 1
assert (count == 5)
assert count == 5
# Restore configuration num_parallel_workers
ds.config.set_num_parallel_workers(original_num_parallel_workers)

@ -24,10 +24,10 @@ def test_voc_segmentation():
data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", decode=True, shuffle=False)
num = 0
for item in data1.create_dict_iterator():
assert (item["image"].shape[0] == IMAGE_SHAPE[num])
assert (item["target"].shape[0] == TARGET_SHAPE[num])
assert item["image"].shape[0] == IMAGE_SHAPE[num]
assert item["target"].shape[0] == TARGET_SHAPE[num]
num += 1
assert (num == 10)
assert num == 10
def test_voc_detection():
@ -35,12 +35,12 @@ def test_voc_detection():
num = 0
count = [0, 0, 0, 0, 0, 0]
for item in data1.create_dict_iterator():
assert (item["image"].shape[0] == IMAGE_SHAPE[num])
assert item["image"].shape[0] == IMAGE_SHAPE[num]
for bbox in item["annotation"]:
count[bbox[0]] += 1
num += 1
assert (num == 9)
assert (count == [3, 2, 1, 2, 4, 3])
assert num == 9
assert count == [3, 2, 1, 2, 4, 3]
def test_voc_class_index():
@ -58,8 +58,8 @@ def test_voc_class_index():
assert (bbox[0] == 0 or bbox[0] == 1 or bbox[0] == 5)
count[bbox[0]] += 1
num += 1
assert (num == 6)
assert (count == [3, 2, 0, 0, 0, 3])
assert num == 6
assert count == [3, 2, 0, 0, 0, 3]
def test_voc_get_class_indexing():
@ -76,8 +76,8 @@ def test_voc_get_class_indexing():
assert (bbox[0] == 0 or bbox[0] == 1 or bbox[0] == 2 or bbox[0] == 3 or bbox[0] == 4 or bbox[0] == 5)
count[bbox[0]] += 1
num += 1
assert (num == 9)
assert (count == [3, 2, 1, 2, 4, 3])
assert num == 9
assert count == [3, 2, 1, 2, 4, 3]
def test_case_0():
@ -93,9 +93,9 @@ def test_case_0():
data1 = data1.batch(batch_size, drop_remainder=True)
num = 0
for item in data1.create_dict_iterator():
for _ in data1.create_dict_iterator():
num += 1
assert (num == 20)
assert num == 20
def test_case_1():
@ -110,9 +110,9 @@ def test_case_1():
data1 = data1.batch(batch_size, drop_remainder=True, pad_info={})
num = 0
for item in data1.create_dict_iterator():
for _ in data1.create_dict_iterator():
num += 1
assert (num == 18)
assert num == 18
def test_voc_exception():

@ -58,7 +58,7 @@ def test_equalize(plot=False):
ds_original = ds_original.batch(512)
for idx, (image, label) in enumerate(ds_original):
for idx, (image, _) in enumerate(ds_original):
if idx == 0:
images_original = np.transpose(image, (0, 2, 3, 1))
else:
@ -79,7 +79,7 @@ def test_equalize(plot=False):
ds_equalize = ds_equalize.batch(512)
for idx, (image, label) in enumerate(ds_equalize):
for idx, (image, _) in enumerate(ds_equalize):
if idx == 0:
images_equalize = np.transpose(image, (0, 2, 3, 1))
else:

@ -15,9 +15,7 @@
import numpy as np
import mindspore.common.dtype as mstype
import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as cde
DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
@ -31,7 +29,6 @@ def test_diff_predicate_func():
cde.Decode(),
cde.Resize([64, 64])
]
type_cast_op = C.TypeCast(mstype.int32)
dataset = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], shuffle=False)
dataset = dataset.map(input_columns=["image"], operations=transforms, num_parallel_workers=1)
dataset = dataset.filter(input_columns=["image", "label"], predicate=predicate_func, num_parallel_workers=4)
@ -40,7 +37,6 @@ def test_diff_predicate_func():
label_list = []
for data in dataset.create_dict_iterator():
num_iter += 1
ori_img = data["image"]
label = data["label"]
label_list.append(label)
assert num_iter == 1
@ -200,6 +196,7 @@ def generator_1d_zip2():
def filter_func_zip(data1, data2):
_ = data2
if data1 > 20:
return False
return True
@ -249,6 +246,7 @@ def test_filter_by_generator_with_zip_after():
def filter_func_map(col1, col2):
_ = col2
if col1[0] > 8:
return True
return False
@ -262,6 +260,7 @@ def filter_func_map_part(col1):
def filter_func_map_all(col1, col2):
_, _ = col1, col2
return True
@ -334,6 +333,7 @@ def test_filter_by_generator_with_rename():
# test input_column
def filter_func_input_column1(col1, col2):
_ = col2
if col1[0] < 8:
return True
return False
@ -346,6 +346,7 @@ def filter_func_input_column2(col1):
def filter_func_input_column3(col1):
_ = col1
return True
@ -380,6 +381,7 @@ def generator_mc_p1(maxid=20):
def filter_func_Partial_0(col1, col2, col3, col4):
_, _, _ = col2, col3, col4
filter_data = [0, 1, 2, 3, 4, 11]
if col1[0] in filter_data:
return False
@ -439,6 +441,7 @@ def test_filter_by_generator_Partial2():
def filter_func_Partial(col1, col2):
_ = col2
if col1[0] % 3 == 0:
return True
return False
@ -461,6 +464,7 @@ def test_filter_by_generator_Partial():
def filter_func_cifar(col1, col2):
_ = col1
if col2 % 3 == 0:
return True
return False
@ -490,6 +494,7 @@ def generator_sort2(maxid=20):
def filter_func_part_sort(col1, col2, col3, col4, col5, col6):
_, _, _, _, _, _ = col1, col2, col3, col4, col5, col6
return True

@ -58,7 +58,7 @@ def test_invert(plot=False):
ds_original = ds_original.batch(512)
for idx, (image, label) in enumerate(ds_original):
for idx, (image, _) in enumerate(ds_original):
if idx == 0:
images_original = np.transpose(image, (0, 2, 3, 1))
else:
@ -79,7 +79,7 @@ def test_invert(plot=False):
ds_invert = ds_invert.batch(512)
for idx, (image, label) in enumerate(ds_invert):
for idx, (image, _) in enumerate(ds_invert):
if idx == 0:
images_invert = np.transpose(image, (0, 2, 3, 1))
else:

@ -17,11 +17,11 @@ This is the test module for mindrecord
"""
import collections
import json
import numpy as np
import os
import pytest
import re
import string
import pytest
import numpy as np
import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision
@ -46,9 +46,10 @@ def add_and_remove_cv_file():
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
for x in paths:
os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
os.remove("{}.db".format(x)) if os.path.exists(
"{}.db".format(x)) else None
if os.path.exists("{}".format(x)):
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"},
@ -117,7 +118,9 @@ def add_and_remove_nlp_compress_file():
255, 256, -32768, 32767, -32769, 32768, -2147483648,
2147483647], dtype=np.int32), [-1]),
"array_b": np.reshape(np.array([0, 1, -1, 127, -128, 128, -129, 255,
256, -32768, 32767, -32769, 32768, -2147483648, 2147483647, -2147483649, 2147483649, -922337036854775808, 9223372036854775807]), [1, -1]),
256, -32768, 32767, -32769, 32768,
-2147483648, 2147483647, -2147483649, 2147483649,
-922337036854775808, 9223372036854775807]), [1, -1]),
"array_c": str.encode("nlp data"),
"array_d": np.reshape(np.array([[-10, -127], [10, 127]]), [2, -1])
})
@ -151,7 +154,9 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file):
255, 256, -32768, 32767, -32769, 32768, -2147483648,
2147483647], dtype=np.int32), [-1]),
"array_b": np.reshape(np.array([0, 1, -1, 127, -128, 128, -129, 255,
256, -32768, 32767, -32769, 32768, -2147483648, 2147483647, -2147483649, 2147483649, -922337036854775808, 9223372036854775807]), [1, -1]),
256, -32768, 32767, -32769, 32768,
-2147483648, 2147483647, -2147483649, 2147483649,
-922337036854775808, 9223372036854775807]), [1, -1]),
"array_c": str.encode("nlp data"),
"array_d": np.reshape(np.array([[-10, -127], [10, 127]]), [2, -1])
})
@ -194,9 +199,10 @@ def test_cv_minddataset_writer_tutorial():
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
for x in paths:
os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
os.remove("{}.db".format(x)) if os.path.exists(
"{}.db".format(x)) else None
if os.path.exists("{}".format(x)):
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME)
cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"},
@ -478,9 +484,10 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
for x in paths:
os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
os.remove("{}.db".format(x)) if os.path.exists(
"{}.db".format(x)) else None
if os.path.exists("{}".format(x)):
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV1_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"},
@ -779,7 +786,7 @@ def get_nlp_data(dir_name, vocab_file, num):
"""
if not os.path.isdir(dir_name):
raise IOError("Directory {} not exists".format(dir_name))
for root, dirs, files in os.walk(dir_name):
for root, _, files in os.walk(dir_name):
for index, file_name_extension in enumerate(files):
if index < num:
file_path = os.path.join(root, file_name_extension)
@ -851,7 +858,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
if os.path.exists("{}".format(mindrecord_file_name)):
os.remove("{}".format(mindrecord_file_name))
if os.path.exists("{}.db".format(mindrecord_file_name)):
os.remove("{}.db".format(x))
os.remove("{}.db".format(mindrecord_file_name))
data = [{"file_name": "001.jpg", "label": 4,
"image1": bytes("image1 bytes abc", encoding='UTF-8'),
"image2": bytes("image1 bytes def", encoding='UTF-8'),

@ -26,8 +26,10 @@ CV1_FILE_NAME = "./imagenet1.mindrecord"
def create_cv_mindrecord(files_num):
"""tutorial for cv dataset writer."""
os.remove(CV_FILE_NAME) if os.path.exists(CV_FILE_NAME) else None
os.remove("{}.db".format(CV_FILE_NAME)) if os.path.exists("{}.db".format(CV_FILE_NAME)) else None
if os.path.exists(CV_FILE_NAME):
os.remove(CV_FILE_NAME)
if os.path.exists("{}.db".format(CV_FILE_NAME)):
os.remove("{}.db".format(CV_FILE_NAME))
writer = FileWriter(CV_FILE_NAME, files_num)
cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"}, "data": {"type": "bytes"}}
data = [{"file_name": "001.jpg", "label": 43, "data": bytes('0xffsafdafda', encoding='utf-8')}]
@ -39,8 +41,10 @@ def create_cv_mindrecord(files_num):
def create_diff_schema_cv_mindrecord(files_num):
"""tutorial for cv dataset writer."""
os.remove(CV1_FILE_NAME) if os.path.exists(CV1_FILE_NAME) else None
os.remove("{}.db".format(CV1_FILE_NAME)) if os.path.exists("{}.db".format(CV1_FILE_NAME)) else None
if os.path.exists(CV1_FILE_NAME):
os.remove(CV1_FILE_NAME)
if os.path.exists("{}.db".format(CV1_FILE_NAME)):
os.remove("{}.db".format(CV1_FILE_NAME))
writer = FileWriter(CV1_FILE_NAME, files_num)
cv_schema_json = {"file_name_1": {"type": "string"}, "label": {"type": "int32"}, "data": {"type": "bytes"}}
data = [{"file_name_1": "001.jpg", "label": 43, "data": bytes('0xffsafdafda', encoding='utf-8')}]
@ -52,8 +56,10 @@ def create_diff_schema_cv_mindrecord(files_num):
def create_diff_page_size_cv_mindrecord(files_num):
"""tutorial for cv dataset writer."""
os.remove(CV1_FILE_NAME) if os.path.exists(CV1_FILE_NAME) else None
os.remove("{}.db".format(CV1_FILE_NAME)) if os.path.exists("{}.db".format(CV1_FILE_NAME)) else None
if os.path.exists(CV1_FILE_NAME):
os.remove(CV1_FILE_NAME)
if os.path.exists("{}.db".format(CV1_FILE_NAME)):
os.remove("{}.db".format(CV1_FILE_NAME))
writer = FileWriter(CV1_FILE_NAME, files_num)
writer.set_page_size(1 << 26) # 64MB
cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"}, "data": {"type": "bytes"}}
@ -69,8 +75,8 @@ def test_cv_lack_json():
create_cv_mindrecord(1)
columns_list = ["data", "file_name", "label"]
num_readers = 4
with pytest.raises(Exception) as err:
data_set = ds.MindDataset(CV_FILE_NAME, "no_exist.json", columns_list, num_readers)
with pytest.raises(Exception):
ds.MindDataset(CV_FILE_NAME, "no_exist.json", columns_list, num_readers)
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
@ -80,7 +86,7 @@ def test_cv_lack_mindrecord():
columns_list = ["data", "file_name", "label"]
num_readers = 4
with pytest.raises(Exception, match="does not exist or permission denied"):
data_set = ds.MindDataset("no_exist.mindrecord", columns_list, num_readers)
_ = ds.MindDataset("no_exist.mindrecord", columns_list, num_readers)
def test_invalid_mindrecord():
@ -134,7 +140,7 @@ def test_cv_minddataset_pk_sample_exclusive_shuffle():
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers,
sampler=sampler, shuffle=False)
num_iter = 0
for item in data_set.create_dict_iterator():
for _ in data_set.create_dict_iterator():
num_iter += 1
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
@ -149,7 +155,7 @@ def test_cv_minddataset_reader_different_schema():
data_set = ds.MindDataset([CV_FILE_NAME, CV1_FILE_NAME], columns_list,
num_readers)
num_iter = 0
for item in data_set.create_dict_iterator():
for _ in data_set.create_dict_iterator():
num_iter += 1
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
@ -166,7 +172,7 @@ def test_cv_minddataset_reader_different_page_size():
data_set = ds.MindDataset([CV_FILE_NAME, CV1_FILE_NAME], columns_list,
num_readers)
num_iter = 0
for item in data_set.create_dict_iterator():
for _ in data_set.create_dict_iterator():
num_iter += 1
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
@ -181,7 +187,7 @@ def test_minddataset_invalidate_num_shards():
with pytest.raises(Exception, match="shard_id is invalid, "):
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 0, 1)
num_iter = 0
for item in data_set.create_dict_iterator():
for _ in data_set.create_dict_iterator():
num_iter += 1
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
@ -194,7 +200,7 @@ def test_minddataset_invalidate_shard_id():
with pytest.raises(Exception, match="shard_id is invalid, "):
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 1, -1)
num_iter = 0
for item in data_set.create_dict_iterator():
for _ in data_set.create_dict_iterator():
num_iter += 1
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
@ -207,13 +213,13 @@ def test_minddataset_shard_id_bigger_than_num_shard():
with pytest.raises(Exception, match="shard_id is invalid, "):
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 2)
num_iter = 0
for item in data_set.create_dict_iterator():
for _ in data_set.create_dict_iterator():
num_iter += 1
with pytest.raises(Exception, match="shard_id is invalid, "):
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5)
num_iter = 0
for item in data_set.create_dict_iterator():
for _ in data_set.create_dict_iterator():
num_iter += 1
os.remove(CV_FILE_NAME)

@ -50,7 +50,7 @@ def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial():
assert os.path.exists(CV_FILE_NAME)
assert os.path.exists(CV_FILE_NAME + ".db")
"""tutorial for minderdataset."""
# tutorial for minderdataset.
columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
num_readers = 1
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)

@ -20,7 +20,6 @@ import pytest
import mindspore.dataset as ds
from mindspore import log as logger
from mindspore.dataset.transforms.vision import Inter
from mindspore.dataset.text import to_str
from mindspore.mindrecord import FileWriter

@ -39,7 +39,7 @@ def test_on_tokenized_line():
res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14],
[11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32)
for i, d in enumerate(data.create_dict_iterator()):
np.testing.assert_array_equal(d["text"], res[i]), i
_ = (np.testing.assert_array_equal(d["text"], res[i]), i)
if __name__ == '__main__':

@ -199,7 +199,7 @@ def test_jieba_5():
def gen():
text = np.array("今天天气太好了我们一起去外面玩吧".encode("UTF8"), dtype='S')
yield text,
yield (text,)
def pytoken_op(input_data):

@ -109,10 +109,9 @@ def test_decode_op():
data1 = data1.map(input_columns=["image"], operations=decode_op)
num_iter = 0
image = None
for item in data1.create_dict_iterator():
logger.info("Looping inside iterator {}".format(num_iter))
image = item["image"]
_ = item["image"]
# plt.subplot(131)
# plt.imshow(image)
# plt.title("DE image")
@ -134,10 +133,9 @@ def test_decode_normalize_op():
data1 = data1.map(input_columns=["image"], operations=[decode_op, normalize_op])
num_iter = 0
image = None
for item in data1.create_dict_iterator():
logger.info("Looping inside iterator {}".format(num_iter))
image = item["image"]
_ = item["image"]
# plt.subplot(131)
# plt.imshow(image)
# plt.title("DE image")

@ -37,8 +37,7 @@ def test_case_0():
data1 = data1.batch(2)
i = 0
for item in data1.create_dict_iterator(): # each data is a dictionary
for _ in data1.create_dict_iterator(): # each data is a dictionary
pass

@ -72,7 +72,7 @@ def test_pad_op():
# pylint: disable=unnecessary-lambda
def test_pad_grayscale():
"""
Tests that the pad works for grayscale images
Tests that the pad works for grayscale images
"""
def channel_swap(image):
@ -92,7 +92,7 @@ def test_pad_grayscale():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform())
# if input is grayscale, the output dimensions should be single channel
# if input is grayscale, the output dimensions should be single channel
pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20))
data1 = data1.map(input_columns=["image"], operations=pad_gray)
dataset_shape_1 = []
@ -100,11 +100,11 @@ def test_pad_grayscale():
c_image = item1["image"]
dataset_shape_1.append(c_image.shape)
# Dataset for comparison
# Dataset for comparison
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = c_vision.Decode()
# we use the same padding logic
# we use the same padding logic
ctrans = [decode_op, pad_gray]
dataset_shape_2 = []

@ -119,7 +119,7 @@ def batch_padding_performance_3d():
num_batches = 0
for _ in data1.create_dict_iterator():
num_batches += 1
res = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
_ = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
# print(res)
@ -135,7 +135,7 @@ def batch_padding_performance_1d():
num_batches = 0
for _ in data1.create_dict_iterator():
num_batches += 1
res = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
_ = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
# print(res)
@ -151,7 +151,7 @@ def batch_pyfunc_padding_3d():
num_batches = 0
for _ in data1.create_dict_iterator():
num_batches += 1
res = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
_ = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
# print(res)
@ -166,7 +166,7 @@ def batch_pyfunc_padding_1d():
num_batches = 0
for _ in data1.create_dict_iterator():
num_batches += 1
res = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
_ = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
# print(res)

@ -58,7 +58,7 @@ def test_random_color(degrees=(0.1, 1.9), plot=False):
ds_original = ds_original.batch(512)
for idx, (image, label) in enumerate(ds_original):
for idx, (image, _) in enumerate(ds_original):
if idx == 0:
images_original = np.transpose(image, (0, 2, 3, 1))
else:
@ -79,7 +79,7 @@ def test_random_color(degrees=(0.1, 1.9), plot=False):
ds_random_color = ds_random_color.batch(512)
for idx, (image, label) in enumerate(ds_random_color):
for idx, (image, _) in enumerate(ds_random_color):
if idx == 0:
images_random_color = np.transpose(image, (0, 2, 3, 1))
else:

@ -256,7 +256,7 @@ def test_random_color_adjust_op_hue(plot=False):
# pylint: disable=unnecessary-lambda
def test_random_color_adjust_grayscale():
"""
Tests that the random color adjust works for grayscale images
Tests that the random color adjust works for grayscale images
"""
def channel_swap(image):
@ -284,7 +284,7 @@ def test_random_color_adjust_grayscale():
for item1 in data1.create_dict_iterator():
c_image = item1["image"]
dataset_shape_1.append(c_image.shape)
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))

@ -200,7 +200,7 @@ def test_random_crop_04_c():
for item in data.create_dict_iterator():
image = item["image"]
image_list.append(image.shape)
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
def test_random_crop_04_py():
@ -227,7 +227,7 @@ def test_random_crop_04_py():
for item in data.create_dict_iterator():
image = (item["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image_list.append(image.shape)
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
def test_random_crop_05_c():
@ -439,7 +439,7 @@ def test_random_crop_09():
for item in data.create_dict_iterator():
image = item["image"]
image_list.append(image.shape)
except BaseException as e:
except Exception as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "should be PIL Image" in str(e)

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save