diff --git a/mindspore/dataset/text/transforms.py b/mindspore/dataset/text/transforms.py index a6a74d47a5..31eae6d951 100644 --- a/mindspore/dataset/text/transforms.py +++ b/mindspore/dataset/text/transforms.py @@ -282,8 +282,9 @@ class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp): >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32], >>> # ["offsets_limit", dtype=uint32]} >>> tokenizer_op = text.UnicodeCharTokenizer(True) - >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) + >>> data = data.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) """ @check_with_offsets @@ -313,8 +314,9 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp): >>> # ["offsets_limit", dtype=uint32]} >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'], >>> max_bytes_per_token=100, with_offsets=True) - >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) + >>> data = data.map(operations=tokenizer_op, + >>> input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) """ @check_wordpiece_tokenizer @@ -378,8 +380,9 @@ if platform.system().lower() != 'windows': >>> # ["offsets_start", dtype=uint32], >>> # ["offsets_limit", dtype=uint32]} >>> tokenizer_op = text.WhitespaceTokenizer(True) - >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) + >>> data = data.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) """ @check_with_offsets @@ -404,8 +407,9 @@ if platform.system().lower() != 'windows': >>> # ["offsets_start", dtype=uint32], >>> # ["offsets_limit", dtype=uint32]} >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True) - >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) + >>> data = data.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) """ @check_unicode_script_tokenizer @@ -497,8 +501,9 @@ if platform.system().lower() != 'windows': >>> # ["offsets_start", dtype=uint32], >>> # ["offsets_limit", dtype=uint32]} >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True) - >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) + >>> data = data.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) """ @check_regex_tokenizer @@ -540,8 +545,9 @@ if platform.system().lower() != 'windows': >>> normalization_form=NormalizeForm.NONE, >>> preserve_unused_token=True, >>> with_offsets=True) - >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) + >>> data = data.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) """ @check_basic_tokenizer @@ -593,8 +599,9 @@ if platform.system().lower() != 'windows': >>> unknown_token=100, lower_case=False, keep_whitespace=False, >>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True, >>> with_offsets=True) - >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) + >>> data = data.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) """ @check_bert_tokenizer diff --git a/model_zoo/official/cv/alexnet/src/dataset.py b/model_zoo/official/cv/alexnet/src/dataset.py index 688b6fa672..65808f9b67 100644 --- a/model_zoo/official/cv/alexnet/src/dataset.py +++ b/model_zoo/official/cv/alexnet/src/dataset.py @@ -39,14 +39,14 @@ def create_dataset_cifar10(data_path, batch_size=32, repeat_size=1, status="trai random_horizontal_op = CV.RandomHorizontalFlip() channel_swap_op = CV.HWC2CHW() typecast_op = C.TypeCast(mstype.int32) - cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op) + cifar_ds = cifar_ds.map(operations=typecast_op, input_columns="label") if status == "train": - cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op) - cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op) - cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op) - cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op) - cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op) - cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op) + cifar_ds = cifar_ds.map(operations=random_crop_op, input_columns="image") + cifar_ds = cifar_ds.map(operations=random_horizontal_op, input_columns="image") + cifar_ds = cifar_ds.map(operations=resize_op, input_columns="image") + cifar_ds = cifar_ds.map(operations=rescale_op, input_columns="image") + cifar_ds = cifar_ds.map(operations=normalize_op, input_columns="image") + cifar_ds = cifar_ds.map(operations=channel_swap_op, input_columns="image") cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size) cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) diff --git a/model_zoo/official/cv/deeplabv3/src/data/data_generator.py b/model_zoo/official/cv/deeplabv3/src/data/data_generator.py index c0ce3106d3..8e58d46ded 100644 --- a/model_zoo/official/cv/deeplabv3/src/data/data_generator.py +++ b/model_zoo/official/cv/deeplabv3/src/data/data_generator.py @@ -84,8 +84,9 @@ class SegDataset: shuffle=True, num_parallel_workers=self.num_readers, num_shards=self.shard_num, shard_id=self.shard_id) transforms_list = self.preprocess_ - data_set = data_set.map(input_columns=["data", "label"], output_columns=["data", "label"], - operations=transforms_list, num_parallel_workers=self.num_parallel_calls) + data_set = data_set.map(operations=transforms_list, input_columns=["data", "label"], + output_columns=["data", "label"], + num_parallel_workers=self.num_parallel_calls) data_set = data_set.shuffle(buffer_size=self.batch_size * 10) data_set = data_set.batch(self.batch_size, drop_remainder=True) data_set = data_set.repeat(repeat) diff --git a/model_zoo/official/cv/faster_rcnn/src/dataset.py b/model_zoo/official/cv/faster_rcnn/src/dataset.py index fd221ceddc..150c5a15bd 100644 --- a/model_zoo/official/cv/faster_rcnn/src/dataset.py +++ b/model_zoo/official/cv/faster_rcnn/src/dataset.py @@ -74,8 +74,10 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou'): ious = ious.T return ious + class PhotoMetricDistortion: """Photo Metric Distortion""" + def __init__(self, brightness_delta=32, contrast_range=(0.5, 1.5), @@ -134,8 +136,10 @@ class PhotoMetricDistortion: return img, boxes, labels + class Expand: """expand image""" + def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)): if to_rgb: self.mean = mean[::-1] @@ -158,12 +162,13 @@ class Expand: boxes += np.tile((left, top), 2) return img, boxes, labels + def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num): """rescale operation for image""" img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True) if img_data.shape[0] > config.img_height: img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True) - scale_factor = scale_factor*scale_factor2 + scale_factor = scale_factor * scale_factor2 img_shape = np.append(img_shape, scale_factor) img_shape = np.asarray(img_shape, dtype=np.float32) gt_bboxes = gt_bboxes * scale_factor @@ -171,7 +176,8 @@ def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num): gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) - return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num): """resize operation for image""" @@ -188,7 +194,8 @@ def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num): gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) - return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num): """resize operation for image of eval""" @@ -205,7 +212,8 @@ def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num): gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) - return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num): """impad operation for image""" @@ -213,12 +221,14 @@ def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num): img_data = img_data.astype(np.float32) return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num): """imnormalize operation for image""" img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True) img_data = img_data.astype(np.float32) return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num): """flip operation for image""" img_data = img @@ -229,7 +239,8 @@ def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num): flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 - return (img_data, img_shape, flipped, gt_label, gt_num) + return (img_data, img_shape, flipped, gt_label, gt_num) + def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num): """flipped generation""" @@ -240,11 +251,13 @@ def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num): flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 - return (img_data, img_shape, flipped, gt_label, gt_num) + return (img_data, img_shape, flipped, gt_label, gt_num) + def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num): img_data = img[:, :, ::-1] - return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num): """transpose operation for image""" @@ -257,6 +270,7 @@ def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num): return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num): """photo crop operation for image""" random_photo = PhotoMetricDistortion() @@ -264,6 +278,7 @@ def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num): return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num): """expand operation for image""" expand = Expand() @@ -271,8 +286,10 @@ def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num): return (img, img_shape, gt_bboxes, gt_label, gt_num) + def preprocess_fn(image, box, is_training): """Preprocess function for dataset.""" + def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert): image_shape = image_shape[:2] input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert @@ -325,6 +342,7 @@ def preprocess_fn(image, box, is_training): return _data_aug(image, box, is_training) + def create_coco_label(is_training): """Get image path and annotation from COCO.""" from pycocotools.coco import COCO @@ -334,7 +352,7 @@ def create_coco_label(is_training): if is_training: data_type = config.train_data_type - #Classes need to train or test. + # Classes need to train or test. train_cls = config.coco_classes train_cls_dict = {} for i, cls in enumerate(train_cls): @@ -375,6 +393,7 @@ def create_coco_label(is_training): return image_files, image_anno_dict + def anno_parser(annos_str): """Parse annotation from string to list.""" annos = [] @@ -383,6 +402,7 @@ def anno_parser(annos_str): annos.append(anno) return annos + def filter_valid_data(image_dir, anno_path): """Filter valid image file, which both in image_dir and anno_path.""" image_files = [] @@ -404,6 +424,7 @@ def filter_valid_data(image_dir, anno_path): image_files.append(image_path) return image_files, image_anno_dict + def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8): """Create MindRecord file.""" mindrecord_dir = config.mindrecord_dir @@ -435,7 +456,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id, num_parallel_workers=1, shuffle=is_training) decode = C.Decode() - ds = ds.map(input_columns=["image"], operations=decode, num_parallel_workers=1) + ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=1) compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) hwc_to_chw = C.HWC2CHW() @@ -447,38 +468,39 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi type_cast3 = CC.TypeCast(mstype.bool_) if is_training: - ds = ds.map(input_columns=["image", "annotation"], + ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], column_order=["image", "image_shape", "box", "label", "valid_num"], - operations=compose_map_func, num_parallel_workers=num_parallel_workers) + num_parallel_workers=num_parallel_workers) flip = (np.random.rand() < config.flip_ratio) if flip: - ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0, horizontally_op], + ds = ds.map(operations=[normalize_op, type_cast0, horizontally_op], input_columns=["image"], num_parallel_workers=12) - ds = ds.map(input_columns=["image", "image_shape", "box", "label", "valid_num"], - operations=flipped_generation, num_parallel_workers=num_parallel_workers) + ds = ds.map(operations=flipped_generation, + input_columns=["image", "image_shape", "box", "label", "valid_num"], + num_parallel_workers=num_parallel_workers) else: - ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0], + ds = ds.map(operations=[normalize_op, type_cast0], input_columns=["image"], num_parallel_workers=12) - ds = ds.map(input_columns=["image"], operations=[hwc_to_chw, type_cast1], + ds = ds.map(operations=[hwc_to_chw, type_cast1], input_columns=["image"], num_parallel_workers=12) else: - ds = ds.map(input_columns=["image", "annotation"], + ds = ds.map(operations=compose_map_func, + input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], column_order=["image", "image_shape", "box", "label", "valid_num"], - operations=compose_map_func, num_parallel_workers=num_parallel_workers) - ds = ds.map(input_columns=["image"], operations=[normalize_op, hwc_to_chw, type_cast1], + ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"], num_parallel_workers=24) # transpose_column from python to c - ds = ds.map(input_columns=["image_shape"], operations=[type_cast1]) - ds = ds.map(input_columns=["box"], operations=[type_cast1]) - ds = ds.map(input_columns=["label"], operations=[type_cast2]) - ds = ds.map(input_columns=["valid_num"], operations=[type_cast3]) + ds = ds.map(operations=[type_cast1], input_columns=["image_shape"]) + ds = ds.map(operations=[type_cast1], input_columns=["box"]) + ds = ds.map(operations=[type_cast2], input_columns=["label"]) + ds = ds.map(operations=[type_cast3], input_columns=["valid_num"]) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.repeat(repeat_num) diff --git a/model_zoo/official/cv/googlenet/src/dataset.py b/model_zoo/official/cv/googlenet/src/dataset.py index 7c1553a15a..e8872303a9 100644 --- a/model_zoo/official/cv/googlenet/src/dataset.py +++ b/model_zoo/official/cv/googlenet/src/dataset.py @@ -55,8 +55,8 @@ def create_dataset_cifar10(data_home, repeat_num=1, training=True): c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] # apply map operations on images - data_set = data_set.map(input_columns="label", operations=type_cast_op) - data_set = data_set.map(input_columns="image", operations=c_trans) + data_set = data_set.map(operations=type_cast_op, input_columns="label") + data_set = data_set.map(operations=c_trans, input_columns="image") # apply batch operations data_set = data_set.batch(batch_size=cifar_cfg.batch_size, drop_remainder=True) diff --git a/model_zoo/official/cv/inceptionv3/src/dataset.py b/model_zoo/official/cv/inceptionv3/src/dataset.py index 6ed7175b47..860d93f215 100644 --- a/model_zoo/official/cv/inceptionv3/src/dataset.py +++ b/model_zoo/official/cv/inceptionv3/src/dataset.py @@ -60,8 +60,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1): C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums) - ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums) # apply batch operations ds = ds.batch(cfg.batch_size, drop_remainder=True) # apply dataset repeat operation diff --git a/model_zoo/official/cv/lenet/src/dataset.py b/model_zoo/official/cv/lenet/src/dataset.py index dca3a6af8a..df9eecda1f 100644 --- a/model_zoo/official/cv/lenet/src/dataset.py +++ b/model_zoo/official/cv/lenet/src/dataset.py @@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, type_cast_op = C.TypeCast(mstype.int32) # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps buffer_size = 10000 diff --git a/model_zoo/official/cv/lenet_quant/src/dataset.py b/model_zoo/official/cv/lenet_quant/src/dataset.py index dca3a6af8a..df9eecda1f 100644 --- a/model_zoo/official/cv/lenet_quant/src/dataset.py +++ b/model_zoo/official/cv/lenet_quant/src/dataset.py @@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, type_cast_op = C.TypeCast(mstype.int32) # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps buffer_size = 10000 diff --git a/model_zoo/official/cv/maskrcnn/src/dataset.py b/model_zoo/official/cv/maskrcnn/src/dataset.py index 090e32727f..f685995585 100644 --- a/model_zoo/official/cv/maskrcnn/src/dataset.py +++ b/model_zoo/official/cv/maskrcnn/src/dataset.py @@ -498,24 +498,24 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id num_parallel_workers=4, shuffle=is_training) decode = C.Decode() - ds = ds.map(input_columns=["image"], operations=decode) + ds = ds.map(operations=decode, input_columns=["image"]) compose_map_func = (lambda image, annotation, mask, mask_shape: preprocess_fn(image, annotation, mask, mask_shape, is_training)) if is_training: - ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"], + ds = ds.map(operations=compose_map_func, + input_columns=["image", "annotation", "mask", "mask_shape"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], - operations=compose_map_func, python_multiprocessing=False, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) else: - ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"], + ds = ds.map(operations=compose_map_func, + input_columns=["image", "annotation", "mask", "mask_shape"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], - operations=compose_map_func, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/model_zoo/official/cv/mobilenetv2/src/dataset.py b/model_zoo/official/cv/mobilenetv2/src/dataset.py index 356bd69d23..dab64fd33d 100644 --- a/model_zoo/official/cv/mobilenetv2/src/dataset.py +++ b/model_zoo/official/cv/mobilenetv2/src/dataset.py @@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1): type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) - ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) # apply shuffle operations ds = ds.shuffle(buffer_size=buffer_size) diff --git a/model_zoo/official/cv/mobilenetv2_quant/src/dataset.py b/model_zoo/official/cv/mobilenetv2_quant/src/dataset.py index 431181a969..8c35014dce 100644 --- a/model_zoo/official/cv/mobilenetv2_quant/src/dataset.py +++ b/model_zoo/official/cv/mobilenetv2_quant/src/dataset.py @@ -89,8 +89,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16) - ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=16) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) @@ -152,7 +152,7 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num= compose = P2.Compose(trans) - ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True) + ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) diff --git a/model_zoo/official/cv/mobilenetv3/src/dataset.py b/model_zoo/official/cv/mobilenetv3/src/dataset.py index 40c99623a9..c140a7fdbc 100644 --- a/model_zoo/official/cv/mobilenetv3/src/dataset.py +++ b/model_zoo/official/cv/mobilenetv3/src/dataset.py @@ -70,8 +70,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) - ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) # apply shuffle operations ds = ds.shuffle(buffer_size=buffer_size) diff --git a/model_zoo/official/cv/nasnet/src/dataset.py b/model_zoo/official/cv/nasnet/src/dataset.py index 0b342ba098..743ab2a774 100755 --- a/model_zoo/official/cv/nasnet/src/dataset.py +++ b/model_zoo/official/cv/nasnet/src/dataset.py @@ -61,8 +61,8 @@ def create_dataset(dataset_path, config, do_train, repeat_num=1): C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=config.work_nums) - ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=config.work_nums) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=config.work_nums) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=config.work_nums) # apply batch operations ds = ds.batch(config.batch_size, drop_remainder=True) # apply dataset repeat operation diff --git a/model_zoo/official/cv/resnet/src/dataset.py b/model_zoo/official/cv/resnet/src/dataset.py index c9252834f2..cfc503a80f 100755 --- a/model_zoo/official/cv/resnet/src/dataset.py +++ b/model_zoo/official/cv/resnet/src/dataset.py @@ -22,6 +22,7 @@ import mindspore.dataset.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as C2 from mindspore.communication.management import init, get_rank, get_group_size + def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): """ create a train or evaluate cifar10 dataset for resnet50 @@ -65,8 +66,8 @@ def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target= type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) - ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) @@ -126,8 +127,8 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target= type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) - ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) @@ -165,7 +166,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target= if do_train: trans = [ C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), - C.RandomHorizontalFlip(rank_id/ (rank_id +1)), + C.RandomHorizontalFlip(rank_id / (rank_id + 1)), C.Normalize(mean=mean, std=std), C.HWC2CHW() ] @@ -180,8 +181,8 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target= type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) - ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) @@ -190,6 +191,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target= return ds + def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): """ create a train or eval imagenet2012 dataset for se-resnet50 @@ -233,8 +235,8 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target= ] type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", num_parallel_workers=12, operations=trans) - ds = ds.map(input_columns="label", num_parallel_workers=12, operations=type_cast_op) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=12) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) @@ -244,6 +246,7 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target= return ds + def _get_rank_info(): """ get rank size and rank id diff --git a/model_zoo/official/cv/resnet50_quant/src/dataset.py b/model_zoo/official/cv/resnet50_quant/src/dataset.py index 605543ec12..688104f41f 100755 --- a/model_zoo/official/cv/resnet50_quant/src/dataset.py +++ b/model_zoo/official/cv/resnet50_quant/src/dataset.py @@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) - ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) @@ -147,7 +147,7 @@ def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, targe trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] compose = P2.Compose(trans) - ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True) + ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) diff --git a/model_zoo/official/cv/resnet_thor/src/dataset.py b/model_zoo/official/cv/resnet_thor/src/dataset.py index 69ecf19c46..f00585d23b 100644 --- a/model_zoo/official/cv/resnet_thor/src/dataset.py +++ b/model_zoo/official/cv/resnet_thor/src/dataset.py @@ -75,8 +75,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", num_parallel_workers=num_parallels, operations=trans) - ds = ds.map(input_columns="label", num_parallel_workers=num_parallels, operations=type_cast_op) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=num_parallels) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallels) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) diff --git a/model_zoo/official/cv/resnext50/src/dataset.py b/model_zoo/official/cv/resnext50/src/dataset.py index 2132ccd13a..0176ffa082 100644 --- a/model_zoo/official/cv/resnext50/src/dataset.py +++ b/model_zoo/official/cv/resnext50/src/dataset.py @@ -25,6 +25,7 @@ from src.utils.sampler import DistributedSampler ImageFile.LOAD_TRUNCATED_IMAGES = True + class TxtDataset(): """ create txt dataset. @@ -33,6 +34,7 @@ class TxtDataset(): Returns: de_dataset. """ + def __init__(self, root, txt_name): super(TxtDataset, self).__init__() self.imgs = [] @@ -142,10 +144,10 @@ def classification_dataset(data_dir, image_size, per_batch_size, max_epoch, rank sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle) de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler) - de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=num_parallel_workers, - operations=transform_img) - de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=num_parallel_workers, - operations=transform_label) + de_dataset = de_dataset.map(operations=transform_img, input_columns="image", + num_parallel_workers=num_parallel_workers) + de_dataset = de_dataset.map(operations=transform_label, input_columns="label", + num_parallel_workers=num_parallel_workers) columns_to_project = ["image", "label"] de_dataset = de_dataset.project(columns=columns_to_project) diff --git a/model_zoo/official/cv/shufflenetv2/src/dataset.py b/model_zoo/official/cv/shufflenetv2/src/dataset.py index 10f5a80664..c04bed7d0f 100644 --- a/model_zoo/official/cv/shufflenetv2/src/dataset.py +++ b/model_zoo/official/cv/shufflenetv2/src/dataset.py @@ -72,8 +72,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1): ] type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums) - ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums) # apply batch operations ds = ds.batch(cfg.batch_size, drop_remainder=True) diff --git a/model_zoo/official/cv/ssd/src/dataset.py b/model_zoo/official/cv/ssd/src/dataset.py index 4ff66d30db..20030475a0 100644 --- a/model_zoo/official/cv/ssd/src/dataset.py +++ b/model_zoo/official/cv/ssd/src/dataset.py @@ -34,13 +34,15 @@ def _rand(a=0., b=1.): """Generate random.""" return np.random.rand() * (b - a) + a + def get_imageId_from_fileName(filename): """Get imageID from fileName""" try: filename = os.path.splitext(filename)[0] return int(filename) except: - raise NotImplementedError('Filename %s is supposed to be an integer.'%(filename)) + raise NotImplementedError('Filename %s is supposed to be an integer.' % (filename)) + def random_sample_crop(image, boxes): """Random Crop the image and boxes""" @@ -64,7 +66,7 @@ def random_sample_crop(image, boxes): left = _rand() * (width - w) top = _rand() * (height - h) - rect = np.array([int(top), int(left), int(top+h), int(left+w)]) + rect = np.array([int(top), int(left), int(top + h), int(left + w)]) overlap = jaccard_numpy(boxes, rect) # dropout some boxes @@ -103,13 +105,14 @@ def random_sample_crop(image, boxes): def preprocess_fn(img_id, image, box, is_training): """Preprocess function for dataset.""" + def _infer_data(image, input_shape): img_h, img_w, _ = image.shape input_h, input_w = input_shape image = cv2.resize(image, (input_w, input_h)) - #When the channels of image is 1 + # When the channels of image is 1 if len(image.shape) == 2: image = np.expand_dims(image, axis=-1) image = np.concatenate([image, image, image], axis=-1) @@ -150,6 +153,7 @@ def preprocess_fn(img_id, image, box, is_training): box, label, num_match = ssd_bboxes_encode(box) return image, box, label, num_match + return _data_aug(image, box, is_training, image_size=config.img_shape) @@ -158,7 +162,7 @@ def create_voc_label(is_training): voc_dir = config.voc_dir cls_map = {name: i for i, name in enumerate(config.coco_classes)} sub_dir = 'train' if is_training else 'eval' - #sub_dir = 'train' + # sub_dir = 'train' voc_dir = os.path.join(voc_dir, sub_dir) if not os.path.isdir(voc_dir): raise ValueError(f'Cannot find {sub_dir} dataset path.') @@ -244,6 +248,7 @@ def create_voc_label(is_training): return images, image_files_dict, image_anno_dict + def create_coco_label(is_training): """Get image path and annotation from COCO.""" from pycocotools.coco import COCO @@ -253,7 +258,7 @@ def create_coco_label(is_training): if is_training: data_type = config.train_data_type - #Classes need to train or test. + # Classes need to train or test. train_cls = config.coco_classes train_cls_dict = {} for i, cls in enumerate(train_cls): @@ -391,9 +396,10 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num, shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training) decode = C.Decode() - ds = ds.map(input_columns=["image"], operations=decode) + ds = ds.map(operations=decode, input_columns=["image"]) change_swap_op = C.HWC2CHW() - normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) + normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], + std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training)) if is_training: @@ -402,11 +408,11 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num else: output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] - ds = ds.map(input_columns=["img_id", "image", "annotation"], + ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], output_columns=output_columns, column_order=output_columns, - operations=compose_map_func, python_multiprocessing=is_training, + python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) - ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training, + ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.repeat(repeat_num) diff --git a/model_zoo/official/cv/vgg16/src/dataset.py b/model_zoo/official/cv/vgg16/src/dataset.py index 5b05c7cc02..e87947e4bd 100644 --- a/model_zoo/official/cv/vgg16/src/dataset.py +++ b/model_zoo/official/cv/vgg16/src/dataset.py @@ -54,8 +54,8 @@ def vgg_create_dataset(data_home, image_size, batch_size, rank_id=0, rank_size=1 changeswap_op] # apply map operations on images - data_set = data_set.map(input_columns="label", operations=type_cast_op) - data_set = data_set.map(input_columns="image", operations=c_trans) + data_set = data_set.map(operations=type_cast_op, input_columns="label") + data_set = data_set.map(operations=c_trans, input_columns="image") # apply repeat operations data_set = data_set.repeat(repeat_num) @@ -157,8 +157,8 @@ def classification_dataset(data_dir, image_size, per_batch_size, rank=0, group_s sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle) de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler) - de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=8, operations=transform_img) - de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=8, operations=transform_label) + de_dataset = de_dataset.map(operations=transform_img, input_columns="image", num_parallel_workers=8) + de_dataset = de_dataset.map(operations=transform_label, input_columns="label", num_parallel_workers=8) columns_to_project = ["image", "label"] de_dataset = de_dataset.project(columns=columns_to_project) diff --git a/model_zoo/official/cv/warpctc/src/dataset.py b/model_zoo/official/cv/warpctc/src/dataset.py index 5d6c045840..be18a373cf 100755 --- a/model_zoo/official/cv/warpctc/src/dataset.py +++ b/model_zoo/official/cv/warpctc/src/dataset.py @@ -90,8 +90,8 @@ def create_dataset(dataset_path, batch_size=1, num_shards=1, shard_id=0, device_ label_trans = [ c.TypeCast(mstype.int32) ] - ds = ds.map(input_columns=["image"], num_parallel_workers=8, operations=image_trans) - ds = ds.map(input_columns=["label"], num_parallel_workers=8, operations=label_trans) + ds = ds.map(operations=image_trans, input_columns=["image"], num_parallel_workers=8) + ds = ds.map(operations=label_trans, input_columns=["label"], num_parallel_workers=8) ds = ds.batch(batch_size, drop_remainder=True) return ds diff --git a/model_zoo/official/cv/yolov3_darknet53/src/yolo_dataset.py b/model_zoo/official/cv/yolov3_darknet53/src/yolo_dataset.py index b56a1fc403..84e6bd49a1 100644 --- a/model_zoo/official/cv/yolov3_darknet53/src/yolo_dataset.py +++ b/model_zoo/official/cv/yolov3_darknet53/src/yolo_dataset.py @@ -176,11 +176,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num, ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"], sampler=distributed_sampler) compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) - ds = ds.map(input_columns=["image", "img_id"], + ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"], output_columns=["image", "image_shape", "img_id"], column_order=["image", "image_shape", "img_id"], - operations=compose_map_func, num_parallel_workers=8) - ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8) + num_parallel_workers=8) + ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.repeat(max_epoch) diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo_dataset.py b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo_dataset.py index 59b423ad98..b8e9609359 100644 --- a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo_dataset.py +++ b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo_dataset.py @@ -173,11 +173,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num, ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"], sampler=distributed_sampler) compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) - ds = ds.map(input_columns=["image", "img_id"], + ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"], output_columns=["image", "image_shape", "img_id"], column_order=["image", "image_shape", "img_id"], - operations=compose_map_func, num_parallel_workers=8) - ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8) + num_parallel_workers=8) + ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.repeat(max_epoch) diff --git a/model_zoo/official/cv/yolov3_resnet18/src/dataset.py b/model_zoo/official/cv/yolov3_resnet18/src/dataset.py index 36ef978b08..48c6c13ae6 100644 --- a/model_zoo/official/cv/yolov3_resnet18/src/dataset.py +++ b/model_zoo/official/cv/yolov3_resnet18/src/dataset.py @@ -296,21 +296,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=1, device_num= ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training) decode = C.Decode() - ds = ds.map(input_columns=["image"], operations=decode) + ds = ds.map(operations=decode, input_columns=["image"]) compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) if is_training: hwc_to_chw = C.HWC2CHW() - ds = ds.map(input_columns=["image", "annotation"], + ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], - operations=compose_map_func, num_parallel_workers=num_parallel_workers) - ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) + num_parallel_workers=num_parallel_workers) + ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.repeat(repeat_num) else: - ds = ds.map(input_columns=["image", "annotation"], + ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "image_shape", "annotation"], column_order=["image", "image_shape", "annotation"], - operations=compose_map_func, num_parallel_workers=num_parallel_workers) + num_parallel_workers=num_parallel_workers) return ds diff --git a/model_zoo/official/nlp/bert/pretrain_eval.py b/model_zoo/official/nlp/bert/pretrain_eval.py index a10dc28bc5..d2924854d7 100644 --- a/model_zoo/official/nlp/bert/pretrain_eval.py +++ b/model_zoo/official/nlp/bert/pretrain_eval.py @@ -112,12 +112,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''): "masked_lm_weights", "next_sentence_labels"]) type_cast_op = C.TypeCast(mstype.int32) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) - ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") + ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") ds = ds.repeat(repeat_count) # apply batch operations diff --git a/model_zoo/official/nlp/bert/src/clue_classification_dataset_process.py b/model_zoo/official/nlp/bert/src/clue_classification_dataset_process.py index 0d2201cfe6..8d92447eab 100755 --- a/model_zoo/official/nlp/bert/src/clue_classification_dataset_process.py +++ b/model_zoo/official/nlp/bert/src/clue_classification_dataset_process.py @@ -42,30 +42,31 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage usage=data_usage, shuffle=shuffle_dataset) ### Processing label if data_usage == 'test': - dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"], - column_order=["id", "label_id", "sentence"], operations=ops.Duplicate()) - dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0)) + dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"], + column_order=["id", "label_id", "sentence"]) + dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"]) else: label_vocab = text.Vocab.from_list(label_list) label_lookup = text.Lookup(label_vocab) - dataset = dataset.map(input_columns="label_desc", output_columns="label_id", operations=label_lookup) + dataset = dataset.map(operations=label_lookup, input_columns="label_desc", output_columns="label_id") ### Processing sentence vocab = text.Vocab.from_file(bert_vocab_path) tokenizer = text.BertTokenizer(vocab, lower_case=True) lookup = text.Lookup(vocab, unknown_token='[UNK]') - dataset = dataset.map(input_columns=["sentence"], operations=tokenizer) - dataset = dataset.map(input_columns=["sentence"], operations=ops.Slice(slice(0, max_seq_len))) - dataset = dataset.map(input_columns=["sentence"], - operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'), - append=np.array(["[SEP]"], dtype='S'))) - dataset = dataset.map(input_columns=["sentence"], output_columns=["text_ids"], operations=lookup) - dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0)) - dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"], - column_order=["text_ids", "mask_ids", "label_id"], operations=ops.Duplicate()) - dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32)) - dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "segment_ids"], - column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate()) - dataset = dataset.map(input_columns=["segment_ids"], operations=ops.Fill(0)) + dataset = dataset.map(operations=tokenizer, input_columns=["sentence"]) + dataset = dataset.map(operations=ops.Slice(slice(0, max_seq_len)), input_columns=["sentence"]) + dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'), + append=np.array(["[SEP]"], dtype='S')), input_columns=["sentence"]) + dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["text_ids"]) + dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"]) + dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"], + output_columns=["text_ids", "mask_ids"], + column_order=["text_ids", "mask_ids", "label_id"]) + dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"]) + dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"], + output_columns=["text_ids", "segment_ids"], + column_order=["text_ids", "mask_ids", "segment_ids", "label_id"]) + dataset = dataset.map(operations=ops.Fill(0), input_columns=["segment_ids"]) dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) return dataset @@ -86,50 +87,51 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage usage=data_usage, shuffle=shuffle_dataset) ### Processing label if data_usage == 'test': - dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"], - column_order=["id", "label_id", "sentence1", "sentence2"], operations=ops.Duplicate()) - dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0)) + dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"], + column_order=["id", "label_id", "sentence1", "sentence2"]) + dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"]) else: label_vocab = text.Vocab.from_list(label_list) label_lookup = text.Lookup(label_vocab) - dataset = dataset.map(input_columns="label", output_columns="label_id", operations=label_lookup) + dataset = dataset.map(operations=label_lookup, input_columns="label", output_columns="label_id") ### Processing sentence pairs vocab = text.Vocab.from_file(bert_vocab_path) tokenizer = text.BertTokenizer(vocab, lower_case=True) lookup = text.Lookup(vocab, unknown_token='[UNK]') ### Tokenizing sentences and truncate sequence pair - dataset = dataset.map(input_columns=["sentence1"], operations=tokenizer) - dataset = dataset.map(input_columns=["sentence2"], operations=tokenizer) - dataset = dataset.map(input_columns=["sentence1", "sentence2"], - operations=text.TruncateSequencePair(max_seq_len-3)) + dataset = dataset.map(operations=tokenizer, input_columns=["sentence1"]) + dataset = dataset.map(operations=tokenizer, input_columns=["sentence2"]) + dataset = dataset.map(operations=text.TruncateSequencePair(max_seq_len - 3), + input_columns=["sentence1", "sentence2"]) ### Adding special tokens - dataset = dataset.map(input_columns=["sentence1"], - operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'), - append=np.array(["[SEP]"], dtype='S'))) - dataset = dataset.map(input_columns=["sentence2"], - operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S'))) + dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'), + append=np.array(["[SEP]"], dtype='S')), + input_columns=["sentence1"]) + dataset = dataset.map(operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')), + input_columns=["sentence2"]) ### Generating segment_ids - dataset = dataset.map(input_columns=["sentence1"], output_columns=["sentence1", "type_sentence1"], - column_order=["sentence1", "type_sentence1", "sentence2", "label_id"], - operations=ops.Duplicate()) - dataset = dataset.map(input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"], - column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"], - operations=ops.Duplicate()) - dataset = dataset.map(input_columns=["type_sentence1"], operations=[lookup, ops.Fill(0)]) - dataset = dataset.map(input_columns=["type_sentence2"], operations=[lookup, ops.Fill(1)]) - dataset = dataset.map(input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"], - column_order=["sentence1", "sentence2", "segment_ids", "label_id"], - operations=ops.Concatenate()) - dataset = dataset.map(input_columns=["segment_ids"], operations=ops.PadEnd([max_seq_len], 0)) + dataset = dataset.map(operations=ops.Duplicate(), input_columns=["sentence1"], + output_columns=["sentence1", "type_sentence1"], + column_order=["sentence1", "type_sentence1", "sentence2", "label_id"]) + dataset = dataset.map(operations=ops.Duplicate(), + input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"], + column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"]) + dataset = dataset.map(operations=[lookup, ops.Fill(0)], input_columns=["type_sentence1"]) + dataset = dataset.map(operations=[lookup, ops.Fill(1)], input_columns=["type_sentence2"]) + dataset = dataset.map(operations=ops.Concatenate(), + input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"], + column_order=["sentence1", "sentence2", "segment_ids", "label_id"]) + dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["segment_ids"]) ### Generating text_ids - dataset = dataset.map(input_columns=["sentence1", "sentence2"], output_columns=["text_ids"], - column_order=["text_ids", "segment_ids", "label_id"], - operations=ops.Concatenate()) - dataset = dataset.map(input_columns=["text_ids"], operations=lookup) - dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0)) + dataset = dataset.map(operations=ops.Concatenate(), + input_columns=["sentence1", "sentence2"], output_columns=["text_ids"], + column_order=["text_ids", "segment_ids", "label_id"]) + dataset = dataset.map(operations=lookup, input_columns=["text_ids"]) + dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"]) ### Generating mask_ids - dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"], - column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate()) - dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32)) + dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"], + output_columns=["text_ids", "mask_ids"], + column_order=["text_ids", "mask_ids", "segment_ids", "label_id"]) + dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"]) dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) return dataset diff --git a/model_zoo/official/nlp/bert/src/dataset.py b/model_zoo/official/nlp/bert/src/dataset.py index 28c2ef0149..868dab5bca 100644 --- a/model_zoo/official/nlp/bert/src/dataset.py +++ b/model_zoo/official/nlp/bert/src/dataset.py @@ -39,12 +39,12 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None, ori_dataset_size = ds.get_dataset_size() print('origin dataset size: ', ori_dataset_size) type_cast_op = C.TypeCast(mstype.int32) - ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) - ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") + ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") # apply batch operations ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) logger.info("data size: {}".format(ds.get_dataset_size())) @@ -60,12 +60,12 @@ def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle) if assessment_method == "Spearman_correlation": type_cast_op_float = C.TypeCast(mstype.float32) - ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) + ds = ds.map(operations=type_cast_op_float, input_columns="label_ids") else: - ds = ds.map(input_columns="label_ids", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="label_ids") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") ds = ds.repeat(repeat_count) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) @@ -80,12 +80,12 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle) if assessment_method == "Spearman_correlation": type_cast_op_float = C.TypeCast(mstype.float32) - ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) + ds = ds.map(operations=type_cast_op_float, input_columns="label_ids") else: - ds = ds.map(input_columns="label_ids", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="label_ids") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") ds = ds.repeat(repeat_count) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) @@ -101,14 +101,14 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche columns_list=["input_ids", "input_mask", "segment_ids", "start_positions", "end_positions", "unique_ids", "is_impossible"], shuffle=do_shuffle) - ds = ds.map(input_columns="start_positions", operations=type_cast_op) - ds = ds.map(input_columns="end_positions", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="start_positions") + ds = ds.map(operations=type_cast_op, input_columns="end_positions") else: ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None, columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"]) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") ds = ds.repeat(repeat_count) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) diff --git a/model_zoo/official/nlp/bert_thor/pretrain_eval.py b/model_zoo/official/nlp/bert_thor/pretrain_eval.py index 4cb501a4a6..e32295d064 100644 --- a/model_zoo/official/nlp/bert_thor/pretrain_eval.py +++ b/model_zoo/official/nlp/bert_thor/pretrain_eval.py @@ -117,12 +117,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''): "masked_lm_weights", "next_sentence_labels"]) type_cast_op = C.TypeCast(mstype.int32) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) - ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") + ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") ds = ds.repeat(repeat_count) # apply batch operations diff --git a/model_zoo/official/nlp/bert_thor/src/dataset.py b/model_zoo/official/nlp/bert_thor/src/dataset.py index fee6c97024..705aa362e0 100644 --- a/model_zoo/official/nlp/bert_thor/src/dataset.py +++ b/model_zoo/official/nlp/bert_thor/src/dataset.py @@ -40,12 +40,12 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None, ori_dataset_size = ds.get_dataset_size() print('origin dataset size: ', ori_dataset_size) type_cast_op = C.TypeCast(mstype.int32) - ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) - ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") + ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") # apply batch operations ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) logger.info("data size: {}".format(ds.get_dataset_size())) @@ -61,12 +61,12 @@ def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"]) if assessment_method == "Spearman_correlation": type_cast_op_float = C.TypeCast(mstype.float32) - ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) + ds = ds.map(operations=type_cast_op_float, input_columns="label_ids") else: - ds = ds.map(input_columns="label_ids", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="label_ids") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") ds = ds.repeat(repeat_count) # apply shuffle operation buffer_size = 960 @@ -84,12 +84,12 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"]) if assessment_method == "Spearman_correlation": type_cast_op_float = C.TypeCast(mstype.float32) - ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) + ds = ds.map(operations=type_cast_op_float, input_columns="label_ids") else: - ds = ds.map(input_columns="label_ids", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="label_ids") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") ds = ds.repeat(repeat_count) # apply shuffle operation buffer_size = 960 @@ -107,17 +107,17 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche columns_list=["input_ids", "input_mask", "segment_ids", "start_positions", "end_positions", "unique_ids", "is_impossible"]) - ds = ds.map(input_columns="start_positions", operations=type_cast_op) - ds = ds.map(input_columns="end_positions", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="start_positions") + ds = ds.map(operations=type_cast_op, input_columns="end_positions") else: ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None, columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"]) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="input_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") ds = ds.repeat(repeat_count) # apply shuffle operation buffer_size = 960 diff --git a/model_zoo/official/nlp/mass/src/dataset/load_dataset.py b/model_zoo/official/nlp/mass/src/dataset/load_dataset.py index be59941374..d24ce6c49a 100644 --- a/model_zoo/official/nlp/mass/src/dataset/load_dataset.py +++ b/model_zoo/official/nlp/mass/src/dataset/load_dataset.py @@ -60,12 +60,12 @@ def _load_dataset(input_files, batch_size, epoch_count=1, repeat_count = epoch_count type_cast_op = deC.TypeCast(mstype.int32) - ds = ds.map(input_columns="src", operations=type_cast_op) - ds = ds.map(input_columns="src_padding", operations=type_cast_op) - ds = ds.map(input_columns="prev_opt", operations=type_cast_op) - ds = ds.map(input_columns="prev_padding", operations=type_cast_op) - ds = ds.map(input_columns="target", operations=type_cast_op) - ds = ds.map(input_columns="tgt_padding", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="src") + ds = ds.map(operations=type_cast_op, input_columns="src_padding") + ds = ds.map(operations=type_cast_op, input_columns="prev_opt") + ds = ds.map(operations=type_cast_op, input_columns="prev_padding") + ds = ds.map(operations=type_cast_op, input_columns="target") + ds = ds.map(operations=type_cast_op, input_columns="tgt_padding") ds = ds.rename( input_columns=["src", diff --git a/model_zoo/official/nlp/tinybert/src/dataset.py b/model_zoo/official/nlp/tinybert/src/dataset.py index e632e02fe2..5829846043 100644 --- a/model_zoo/official/nlp/tinybert/src/dataset.py +++ b/model_zoo/official/nlp/tinybert/src/dataset.py @@ -49,11 +49,11 @@ def create_tinybert_dataset(task='td', batch_size=32, device_num=1, rank=0, shard_equal_rows=True) type_cast_op = C.TypeCast(mstype.int32) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") if task == "td": - ds = ds.map(input_columns="label_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="label_ids") # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) diff --git a/model_zoo/official/nlp/transformer/eval.py b/model_zoo/official/nlp/transformer/eval.py index 5ced75ba33..865c1307f9 100644 --- a/model_zoo/official/nlp/transformer/eval.py +++ b/model_zoo/official/nlp/transformer/eval.py @@ -40,12 +40,12 @@ def load_test_data(batch_size=1, data_file=None): "target_eos_ids", "target_eos_mask"], shuffle=False) type_cast_op = deC.TypeCast(mstype.int32) - ds = ds.map(input_columns="source_eos_ids", operations=type_cast_op) - ds = ds.map(input_columns="source_eos_mask", operations=type_cast_op) - ds = ds.map(input_columns="target_sos_ids", operations=type_cast_op) - ds = ds.map(input_columns="target_sos_mask", operations=type_cast_op) - ds = ds.map(input_columns="target_eos_ids", operations=type_cast_op) - ds = ds.map(input_columns="target_eos_mask", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="source_eos_ids") + ds = ds.map(operations=type_cast_op, input_columns="source_eos_mask") + ds = ds.map(operations=type_cast_op, input_columns="target_sos_ids") + ds = ds.map(operations=type_cast_op, input_columns="target_sos_mask") + ds = ds.map(operations=type_cast_op, input_columns="target_eos_ids") + ds = ds.map(operations=type_cast_op, input_columns="target_eos_mask") # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) ds.channel_name = 'transformer' diff --git a/model_zoo/official/nlp/transformer/src/dataset.py b/model_zoo/official/nlp/transformer/src/dataset.py index 84dc5427b2..4eb3858bde 100644 --- a/model_zoo/official/nlp/transformer/src/dataset.py +++ b/model_zoo/official/nlp/transformer/src/dataset.py @@ -30,12 +30,12 @@ def create_transformer_dataset(epoch_count=1, rank_size=1, rank_id=0, do_shuffle shuffle=(do_shuffle == "true"), num_shards=rank_size, shard_id=rank_id) type_cast_op = deC.TypeCast(mstype.int32) - ds = ds.map(input_columns="source_eos_ids", operations=type_cast_op) - ds = ds.map(input_columns="source_eos_mask", operations=type_cast_op) - ds = ds.map(input_columns="target_sos_ids", operations=type_cast_op) - ds = ds.map(input_columns="target_sos_mask", operations=type_cast_op) - ds = ds.map(input_columns="target_eos_ids", operations=type_cast_op) - ds = ds.map(input_columns="target_eos_mask", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="source_eos_ids") + ds = ds.map(operations=type_cast_op, input_columns="source_eos_mask") + ds = ds.map(operations=type_cast_op, input_columns="target_sos_ids") + ds = ds.map(operations=type_cast_op, input_columns="target_sos_mask") + ds = ds.map(operations=type_cast_op, input_columns="target_eos_ids") + ds = ds.map(operations=type_cast_op, input_columns="target_eos_mask") # apply batch operations ds = ds.batch(transformer_net_cfg.batch_size, drop_remainder=True) diff --git a/tests/st/mem_reuse/resnet_cifar_memreuse.py b/tests/st/mem_reuse/resnet_cifar_memreuse.py index a7ca83cbf7..e4b12478ed 100644 --- a/tests/st/mem_reuse/resnet_cifar_memreuse.py +++ b/tests/st/mem_reuse/resnet_cifar_memreuse.py @@ -89,8 +89,8 @@ def create_dataset(repeat_num=1, training=True): changeswap_op] # apply map operations on images - ds = ds.map(input_columns="label", operations=type_cast_op) - ds = ds.map(input_columns="image", operations=c_trans) + ds = ds.map(operations=type_cast_op, input_columns="label") + ds = ds.map(operations=c_trans, input_columns="image") # apply repeat operations ds = ds.repeat(repeat_num) diff --git a/tests/st/mem_reuse/resnet_cifar_normal.py b/tests/st/mem_reuse/resnet_cifar_normal.py index e5746a254d..5677a6d730 100644 --- a/tests/st/mem_reuse/resnet_cifar_normal.py +++ b/tests/st/mem_reuse/resnet_cifar_normal.py @@ -89,8 +89,8 @@ def create_dataset(repeat_num=1, training=True): changeswap_op] # apply map operations on images - ds = ds.map(input_columns="label", operations=type_cast_op) - ds = ds.map(input_columns="image", operations=c_trans) + ds = ds.map(operations=type_cast_op, input_columns="label") + ds = ds.map(operations=c_trans, input_columns="image") # apply repeat operations ds = ds.repeat(repeat_num) diff --git a/tests/st/model_zoo_tests/yolov3/src/dataset.py b/tests/st/model_zoo_tests/yolov3/src/dataset.py index 712fe9bb4b..f002ea46d3 100644 --- a/tests/st/model_zoo_tests/yolov3/src/dataset.py +++ b/tests/st/model_zoo_tests/yolov3/src/dataset.py @@ -298,21 +298,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=False) decode = C.Decode() - ds = ds.map(input_columns=["image"], operations=decode) + ds = ds.map(operations=decode, input_columns=["image"]) compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) if is_training: hwc_to_chw = C.HWC2CHW() - ds = ds.map(input_columns=["image", "annotation"], + ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], - operations=compose_map_func, num_parallel_workers=num_parallel_workers) - ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) + num_parallel_workers=num_parallel_workers) + ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.repeat(repeat_num) else: - ds = ds.map(input_columns=["image", "annotation"], + ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "image_shape", "annotation"], column_order=["image", "image_shape", "annotation"], - operations=compose_map_func, num_parallel_workers=num_parallel_workers) + num_parallel_workers=num_parallel_workers) return ds diff --git a/tests/st/networks/models/bert/bert_performance/test_bert_tdt_lossscale.py b/tests/st/networks/models/bert/bert_performance/test_bert_tdt_lossscale.py index ab169e975e..13bb8cef99 100644 --- a/tests/st/networks/models/bert/bert_performance/test_bert_tdt_lossscale.py +++ b/tests/st/networks/models/bert/bert_performance/test_bert_tdt_lossscale.py @@ -102,12 +102,12 @@ def me_de_train_dataset(sink_mode=False): if sink_mode: sink_size = 100 new_repeat_count = 3 - ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) - ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") + ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) logger.info("data size: {}".format(ds.get_dataset_size())) diff --git a/tests/st/networks/models/bert/bert_precision/test_bert_tdt_lossscale.py b/tests/st/networks/models/bert/bert_precision/test_bert_tdt_lossscale.py index ad904e5e8a..a8bada0257 100644 --- a/tests/st/networks/models/bert/bert_precision/test_bert_tdt_lossscale.py +++ b/tests/st/networks/models/bert/bert_precision/test_bert_tdt_lossscale.py @@ -102,12 +102,12 @@ def me_de_train_dataset(sink_mode=False): if sink_mode: sink_size = 100 new_repeat_count = 3 - ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) - ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") + ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) logger.info("data size: {}".format(ds.get_dataset_size())) diff --git a/tests/st/networks/models/bert/src/dataset.py b/tests/st/networks/models/bert/src/dataset.py index 7805d66fdb..6a33a6b584 100644 --- a/tests/st/networks/models/bert/src/dataset.py +++ b/tests/st/networks/models/bert/src/dataset.py @@ -41,12 +41,12 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", d print('origin dataset size: ', ori_dataset_size) new_repeat_count = int(repeat_count * ori_dataset_size // ds.get_dataset_size()) type_cast_op = C.TypeCast(mstype.int32) - ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) - ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") + ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") # apply batch operations ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) ds = ds.repeat(max(new_repeat_count, repeat_count)) diff --git a/tests/st/networks/models/bert/test_bert_graph_kernel.py b/tests/st/networks/models/bert/test_bert_graph_kernel.py index 469871a777..47d8f5d246 100644 --- a/tests/st/networks/models/bert/test_bert_graph_kernel.py +++ b/tests/st/networks/models/bert/test_bert_graph_kernel.py @@ -92,12 +92,12 @@ def me_de_train_dataset(): "next_sentence_labels", "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"], shuffle=False) type_cast_op = C.TypeCast(mstype.int32) - ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) - ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") + ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") + ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") + ds = ds.map(operations=type_cast_op, input_columns="segment_ids") + ds = ds.map(operations=type_cast_op, input_columns="input_mask") + ds = ds.map(operations=type_cast_op, input_columns="input_ids") # apply batch operations batch_size = int(os.getenv('BATCH_SIZE', '16')) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/tests/st/networks/models/deeplabv3/src/md_dataset.py b/tests/st/networks/models/deeplabv3/src/md_dataset.py index 14ffb61d8a..21f8f70db9 100644 --- a/tests/st/networks/models/deeplabv3/src/md_dataset.py +++ b/tests/st/networks/models/deeplabv3/src/md_dataset.py @@ -97,10 +97,10 @@ def create_dataset(args, data_url, epoch_num=1, batch_size=1, usage="train", shu # wrapped with GeneratorDataset dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=None) - dataset = dataset.map(input_columns=["image", "label"], operations=DataTransform(args, usage=usage)) + dataset = dataset.map(operations=DataTransform(args, usage=usage), input_columns=["image", "label"]) channelswap_op = C.HWC2CHW() - dataset = dataset.map(input_columns="image", operations=channelswap_op) + dataset = dataset.map(operations=channelswap_op, input_columns="image") # 1464 samples / batch_size 8 = 183 batches # epoch_num is num of steps diff --git a/tests/st/networks/models/resnet50/src/dataset.py b/tests/st/networks/models/resnet50/src/dataset.py index a5f3f61ad5..c65824166e 100755 --- a/tests/st/networks/models/resnet50/src/dataset.py +++ b/tests/st/networks/models/resnet50/src/dataset.py @@ -68,8 +68,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) - ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) diff --git a/tests/st/networks/models/resnet50/src_thor/dataset.py b/tests/st/networks/models/resnet50/src_thor/dataset.py index b63e0a0de7..8179bdd173 100644 --- a/tests/st/networks/models/resnet50/src_thor/dataset.py +++ b/tests/st/networks/models/resnet50/src_thor/dataset.py @@ -71,8 +71,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) - ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) + ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) + ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) diff --git a/tests/st/networks/test_gpu_lenet.py b/tests/st/networks/test_gpu_lenet.py index 77f42cb9f2..58ab2a99a2 100644 --- a/tests/st/networks/test_gpu_lenet.py +++ b/tests/st/networks/test_gpu_lenet.py @@ -171,11 +171,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, type_cast_op = C.TypeCast(mstype.int32) # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps buffer_size = 10000 diff --git a/tests/st/ops/ascend/test_tdt_data_ms.py b/tests/st/ops/ascend/test_tdt_data_ms.py index c0e1bf69aa..8dd3d4e691 100644 --- a/tests/st/ops/ascend/test_tdt_data_ms.py +++ b/tests/st/ops/ascend/test_tdt_data_ms.py @@ -47,11 +47,11 @@ def test_me_de_train_dataset(): rescale_op = vision.Rescale(rescale, shift) # apply map operations on images - data_set_new = data_set_new.map(input_columns="image/encoded", operations=decode_op) - data_set_new = data_set_new.map(input_columns="image/encoded", operations=resize_op) - data_set_new = data_set_new.map(input_columns="image/encoded", operations=rescale_op) + data_set_new = data_set_new.map(operations=decode_op, input_columns="image/encoded") + data_set_new = data_set_new.map(operations=resize_op, input_columns="image/encoded") + data_set_new = data_set_new.map(operations=rescale_op, input_columns="image/encoded") hwc2chw_op = vision.HWC2CHW() - data_set_new = data_set_new.map(input_columns="image/encoded", operations=hwc2chw_op) + data_set_new = data_set_new.map(operations=hwc2chw_op, input_columns="image/encoded") data_set_new = data_set_new.repeat(1) # apply batch operations batch_size_new = 32 diff --git a/tests/st/probability/dataset.py b/tests/st/probability/dataset.py index dca3a6af8a..df9eecda1f 100644 --- a/tests/st/probability/dataset.py +++ b/tests/st/probability/dataset.py @@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, type_cast_op = C.TypeCast(mstype.int32) # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps buffer_size = 10000 diff --git a/tests/st/probability/test_gpu_svi_cvae.py b/tests/st/probability/test_gpu_svi_cvae.py index 09700a14ec..8dde2b3542 100644 --- a/tests/st/probability/test_gpu_svi_cvae.py +++ b/tests/st/probability/test_gpu_svi_cvae.py @@ -87,9 +87,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, hwc2chw_op = CV.HWC2CHW() # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps mnist_ds = mnist_ds.batch(batch_size) diff --git a/tests/st/probability/test_gpu_svi_vae.py b/tests/st/probability/test_gpu_svi_vae.py index b5ac6537c1..dd338cbc01 100644 --- a/tests/st/probability/test_gpu_svi_vae.py +++ b/tests/st/probability/test_gpu_svi_vae.py @@ -77,9 +77,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, hwc2chw_op = CV.HWC2CHW() # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps mnist_ds = mnist_ds.batch(batch_size) diff --git a/tests/st/probability/test_gpu_vae_gan.py b/tests/st/probability/test_gpu_vae_gan.py index b6bff57635..ac0eeb07ff 100644 --- a/tests/st/probability/test_gpu_vae_gan.py +++ b/tests/st/probability/test_gpu_vae_gan.py @@ -145,9 +145,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, hwc2chw_op = CV.HWC2CHW() # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps mnist_ds = mnist_ds.batch(batch_size) diff --git a/tests/st/probability/test_uncertainty.py b/tests/st/probability/test_uncertainty.py index c36b93f9e1..2131c53a9b 100644 --- a/tests/st/probability/test_uncertainty.py +++ b/tests/st/probability/test_uncertainty.py @@ -98,11 +98,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, type_cast_op = C.TypeCast(mstype.int32) # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps buffer_size = 10000 diff --git a/tests/st/ps/full_ps/test_full_ps_lenet.py b/tests/st/ps/full_ps/test_full_ps_lenet.py index 7693924c8c..c8a0294e72 100644 --- a/tests/st/ps/full_ps/test_full_ps_lenet.py +++ b/tests/st/ps/full_ps/test_full_ps_lenet.py @@ -107,11 +107,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, type_cast_op = C.TypeCast(mstype.int32) # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps buffer_size = 10000 diff --git a/tests/st/pynative/test_pynative_resnet50.py b/tests/st/pynative/test_pynative_resnet50.py index 5122ce3349..ace4676dad 100644 --- a/tests/st/pynative/test_pynative_resnet50.py +++ b/tests/st/pynative/test_pynative_resnet50.py @@ -351,8 +351,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32): changeswap_op] # apply map operations on images - data_set = data_set.map(input_columns="label", operations=type_cast_op) - data_set = data_set.map(input_columns="image", operations=c_trans) + data_set = data_set.map(operations=type_cast_op, input_columns="label") + data_set = data_set.map(operations=c_trans, input_columns="image") # apply shuffle operations data_set = data_set.shuffle(buffer_size=1000) diff --git a/tests/st/quantization/lenet_quant/dataset.py b/tests/st/quantization/lenet_quant/dataset.py index dca3a6af8a..df9eecda1f 100644 --- a/tests/st/quantization/lenet_quant/dataset.py +++ b/tests/st/quantization/lenet_quant/dataset.py @@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, type_cast_op = C.TypeCast(mstype.int32) # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps buffer_size = 10000 diff --git a/tests/st/summary/test_summary.py b/tests/st/summary/test_summary.py index 1989c93e93..205c6b2d5e 100644 --- a/tests/st/summary/test_summary.py +++ b/tests/st/summary/test_summary.py @@ -114,11 +114,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers type_cast_op = C.TypeCast(mstype.int32) # apply map operations on images - mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # apply DatasetOps mnist_ds = mnist_ds.shuffle(buffer_size=10000) # 10000 as in LeNet train script diff --git a/tests/st/tbe_networks/resnet_cifar.py b/tests/st/tbe_networks/resnet_cifar.py index ac835052f6..470a1e8daa 100644 --- a/tests/st/tbe_networks/resnet_cifar.py +++ b/tests/st/tbe_networks/resnet_cifar.py @@ -90,8 +90,8 @@ def create_dataset(repeat_num=1, training=True): changeswap_op] # apply map operations on images - data_set = data_set.map(input_columns="label", operations=type_cast_op) - data_set = data_set.map(input_columns="image", operations=c_trans) + data_set = data_set.map(operations=type_cast_op, input_columns="label") + data_set = data_set.map(operations=c_trans, input_columns="image") # apply repeat operations data_set = data_set.repeat(repeat_num) diff --git a/tests/st/tbe_networks/test_resnet_cifar_1p.py b/tests/st/tbe_networks/test_resnet_cifar_1p.py index de4f93e2b5..d30b315918 100644 --- a/tests/st/tbe_networks/test_resnet_cifar_1p.py +++ b/tests/st/tbe_networks/test_resnet_cifar_1p.py @@ -68,8 +68,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32): changeswap_op] # apply map operations on images - data_set = data_set.map(input_columns="label", operations=type_cast_op) - data_set = data_set.map(input_columns="image", operations=c_trans) + data_set = data_set.map(operations=type_cast_op, input_columns="label") + data_set = data_set.map(operations=c_trans, input_columns="image") # apply shuffle operations data_set = data_set.shuffle(buffer_size=1000) diff --git a/tests/st/tbe_networks/test_resnet_cifar_8p.py b/tests/st/tbe_networks/test_resnet_cifar_8p.py index 482be8dca5..410bda1571 100644 --- a/tests/st/tbe_networks/test_resnet_cifar_8p.py +++ b/tests/st/tbe_networks/test_resnet_cifar_8p.py @@ -79,8 +79,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32, rank_id=0, rank_s changeswap_op] # apply map operations on images - data_set = data_set.map(input_columns="label", operations=type_cast_op) - data_set = data_set.map(input_columns="image", operations=c_trans) + data_set = data_set.map(operations=type_cast_op, input_columns="label") + data_set = data_set.map(operations=c_trans, input_columns="image") # apply shuffle operations data_set = data_set.shuffle(buffer_size=1000) diff --git a/tests/ut/data/dataset/testPyfuncMap/pyfuncmap.py b/tests/ut/data/dataset/testPyfuncMap/pyfuncmap.py index efc66cff70..3b200da870 100644 --- a/tests/ut/data/dataset/testPyfuncMap/pyfuncmap.py +++ b/tests/ut/data/dataset/testPyfuncMap/pyfuncmap.py @@ -29,7 +29,7 @@ def test_case_0(): # apply dataset operations ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - ds1 = ds1.map(input_columns=col, output_columns="out", operations=(lambda x: x + x)) + ds1 = ds1.map(operations=(lambda x: x + x), input_columns=col, output_columns="out") print("************** Output Tensor *****************") for data in ds1.create_dict_iterator(): # each data is a dictionary @@ -49,7 +49,7 @@ def test_case_1(): # apply dataset operations ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1"], operations=(lambda x: (x, x + x))) + ds1 = ds1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"]) print("************** Output Tensor *****************") for data in ds1.create_dict_iterator(): # each data is a dictionary @@ -72,7 +72,7 @@ def test_case_2(): # apply dataset operations ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - ds1 = ds1.map(input_columns=col, output_columns="out", operations=(lambda x, y: x + y)) + ds1 = ds1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out") print("************** Output Tensor *****************") for data in ds1.create_dict_iterator(): # each data is a dictionary @@ -93,8 +93,8 @@ def test_case_3(): # apply dataset operations ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1", "out2"], - operations=(lambda x, y: (x, x + y, x + x + y))) + ds1 = ds1.map(operations=(lambda x, y: (x, x + y, x + x + y)), input_columns=col, + output_columns=["out0", "out1", "out2"]) print("************** Output Tensor *****************") for data in ds1.create_dict_iterator(): # each data is a dictionary @@ -119,8 +119,8 @@ def test_case_4(): # apply dataset operations ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1", "out2"], num_parallel_workers=4, - operations=(lambda x, y: (x, x + y, x + x + y))) + ds1 = ds1.map(operations=(lambda x, y: (x, x + y, x + x + y)), input_columns=col, + output_columns=["out0", "out1", "out2"], num_parallel_workers=4) print("************** Output Tensor *****************") for data in ds1.create_dict_iterator(): # each data is a dictionary diff --git a/tests/ut/python/dataset/test_HWC2CHW.py b/tests/ut/python/dataset/test_HWC2CHW.py index 5b173fd319..0c576ee112 100644 --- a/tests/ut/python/dataset/test_HWC2CHW.py +++ b/tests/ut/python/dataset/test_HWC2CHW.py @@ -39,12 +39,12 @@ def test_HWC2CHW(plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() hwc2chw_op = c_vision.HWC2CHW() - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=hwc2chw_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) image_transposed = [] image = [] @@ -72,8 +72,8 @@ def test_HWC2CHW_md5(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() hwc2chw_op = c_vision.HWC2CHW() - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=hwc2chw_op, input_columns=["image"]) # Compare with expected md5 from images filename = "HWC2CHW_01_result.npz" @@ -90,8 +90,8 @@ def test_HWC2CHW_comp(plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() hwc2chw_op = c_vision.HWC2CHW() - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=hwc2chw_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -101,7 +101,7 @@ def test_HWC2CHW_comp(plot=False): py_vision.HWC2CHW() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) image_c_transposed = [] image_py_transposed = [] diff --git a/tests/ut/python/dataset/test_autocontrast.py b/tests/ut/python/dataset/test_autocontrast.py index 8c0bf951e4..6055ce90ea 100644 --- a/tests/ut/python/dataset/test_autocontrast.py +++ b/tests/ut/python/dataset/test_autocontrast.py @@ -42,8 +42,7 @@ def test_auto_contrast_py(plot=False): F.Resize((224, 224)), F.ToTensor()]) - ds_original = ds.map(input_columns="image", - operations=transforms_original) + ds_original = ds.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) @@ -64,8 +63,7 @@ def test_auto_contrast_py(plot=False): F.AutoContrast(cutoff=10.0, ignore=[10, 20]), F.ToTensor()]) - ds_auto_contrast = ds.map(input_columns="image", - operations=transforms_auto_contrast) + ds_auto_contrast = ds.map(operations=transforms_auto_contrast, input_columns="image") ds_auto_contrast = ds_auto_contrast.batch(512) @@ -99,17 +97,14 @@ def test_auto_contrast_c(plot=False): # AutoContrast Images ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((224, 224))]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20]) c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20]) transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array]) - ds_auto_contrast_py = ds.map(input_columns="image", - operations=transforms_op) + ds_auto_contrast_py = ds.map(operations=transforms_op, input_columns="image") ds_auto_contrast_py = ds_auto_contrast_py.batch(512) @@ -122,12 +117,9 @@ def test_auto_contrast_c(plot=False): axis=0) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((224, 224))]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) - ds_auto_contrast_c = ds.map(input_columns="image", - operations=c_op) + ds_auto_contrast_c = ds.map(operations=c_op, input_columns="image") ds_auto_contrast_c = ds_auto_contrast_c.batch(512) @@ -162,9 +154,7 @@ def test_auto_contrast_one_channel_c(plot=False): # AutoContrast Images ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((224, 224))]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) python_op = F.AutoContrast() c_op = C.AutoContrast() # not using F.ToTensor() since it converts to floats @@ -174,8 +164,7 @@ def test_auto_contrast_one_channel_c(plot=False): python_op, np.array]) - ds_auto_contrast_py = ds.map(input_columns="image", - operations=transforms_op) + ds_auto_contrast_py = ds.map(operations=transforms_op, input_columns="image") ds_auto_contrast_py = ds_auto_contrast_py.batch(512) @@ -188,13 +177,10 @@ def test_auto_contrast_one_channel_c(plot=False): axis=0) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((224, 224)), - lambda img: np.array(img[:, :, 0])]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], + input_columns=["image"]) - ds_auto_contrast_c = ds.map(input_columns="image", - operations=c_op) + ds_auto_contrast_c = ds.map(operations=c_op, input_columns="image") ds_auto_contrast_c = ds_auto_contrast_c.batch(512) @@ -223,8 +209,7 @@ def test_auto_contrast_mnist_c(plot=False): """ logger.info("Test AutoContrast C Op With MNIST Images") ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) - ds_auto_contrast_c = ds.map(input_columns="image", - operations=C.AutoContrast(cutoff=1, ignore=(0, 255))) + ds_auto_contrast_c = ds.map(operations=C.AutoContrast(cutoff=1, ignore=(0, 255)), input_columns="image") ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) images = [] @@ -252,25 +237,20 @@ def test_auto_contrast_invalid_ignore_param_c(): logger.info("Test AutoContrast C Op with invalid ignore parameter") try: ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), - lambda img: np.array(img[:, :, 0])]) + lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid ignore - ds = ds.map(input_columns="image", - operations=C.AutoContrast(ignore=255.5)) + ds = ds.map(operations=C.AutoContrast(ignore=255.5), input_columns="image") except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value 255.5 is not of type" in str(error) try: ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((224, 224)), - lambda img: np.array(img[:, :, 0])]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), + lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid ignore - ds = ds.map(input_columns="image", - operations=C.AutoContrast(ignore=(10, 100))) + ds = ds.map(operations=C.AutoContrast(ignore=(10, 100)), input_columns="image") except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value (10,100) is not of type" in str(error) @@ -283,25 +263,21 @@ def test_auto_contrast_invalid_cutoff_param_c(): logger.info("Test AutoContrast C Op with invalid cutoff parameter") try: ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), - lambda img: np.array(img[:, :, 0])]) + lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid ignore - ds = ds.map(input_columns="image", - operations=C.AutoContrast(cutoff=-10.0)) + ds = ds.map(operations=C.AutoContrast(cutoff=-10.0), input_columns="image") except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) try: ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), - lambda img: np.array(img[:, :, 0])]) + lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid ignore - ds = ds.map(input_columns="image", - operations=C.AutoContrast(cutoff=120.0)) + ds = ds.map(operations=C.AutoContrast(cutoff=120.0), input_columns="image") except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) @@ -314,21 +290,21 @@ def test_auto_contrast_invalid_ignore_param_py(): logger.info("Test AutoContrast python Op with invalid ignore parameter") try: ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), + ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.AutoContrast(ignore=255.5), - F.ToTensor()])]) + F.ToTensor()])], + input_columns=["image"]) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value 255.5 is not of type" in str(error) try: ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), + ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.AutoContrast(ignore=(10, 100)), - F.ToTensor()])]) + F.ToTensor()])], + input_columns=["image"]) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value (10,100) is not of type" in str(error) @@ -341,21 +317,22 @@ def test_auto_contrast_invalid_cutoff_param_py(): logger.info("Test AutoContrast python Op with invalid cutoff parameter") try: ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), + ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.AutoContrast(cutoff=-10.0), - F.ToTensor()])]) + F.ToTensor()])], + input_columns=["image"]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) try: ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), - F.Resize((224, 224)), - F.AutoContrast(cutoff=120.0), - F.ToTensor()])]) + ds = ds.map( + operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), + F.Resize((224, 224)), + F.AutoContrast(cutoff=120.0), + F.ToTensor()])], + input_columns=["image"]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) diff --git a/tests/ut/python/dataset/test_bounding_box_augment.py b/tests/ut/python/dataset/test_bounding_box_augment.py index 502c1184ac..a93311dd16 100644 --- a/tests/ut/python/dataset/test_bounding_box_augment.py +++ b/tests/ut/python/dataset/test_bounding_box_augment.py @@ -49,10 +49,9 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False): test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "bounding_box_augment_rotation_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) @@ -88,10 +87,9 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False): test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "bounding_box_augment_crop_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) @@ -126,10 +124,9 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False): test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) # Add column for "bbox" + column_order=["image", "bbox"]) # Add column for "bbox" filename = "bounding_box_augment_valid_ratio_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) @@ -163,10 +160,9 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False): test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1) - dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"], + dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -195,20 +191,19 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False): # map to apply ops # Add column for "bbox" - dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"], + dataVoc1 = dataVoc1.map( + operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)), + input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) + dataVoc2 = dataVoc2.map( + operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)), + input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=lambda img, bbox: - (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32))) - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=lambda img, bbox: - (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32))) - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "bounding_box_augment_valid_edge_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) @@ -238,10 +233,9 @@ def test_bounding_box_augment_invalid_ratio_c(): # ratio range is from 0 - 1 test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1.5) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) # Add column for "bbox" + column_order=["image", "bbox"]) # Add column for "bbox" except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input ratio is not within the required interval of (0.0 to 1.0)." in str(error) diff --git a/tests/ut/python/dataset/test_c_compose.py b/tests/ut/python/dataset/test_c_compose.py index 2119bac78f..4eb851daa4 100644 --- a/tests/ut/python/dataset/test_c_compose.py +++ b/tests/ut/python/dataset/test_c_compose.py @@ -25,7 +25,7 @@ def test_compose(): def test_config(arr, op_list): try: data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) - data = data.map(input_columns=["col"], operations=ops.Compose(op_list)) + data = data.map(operations=ops.Compose(op_list), input_columns=["col"]) res = [] for i in data.create_dict_iterator(num_epochs=1): res.append(i["col"].tolist()) diff --git a/tests/ut/python/dataset/test_c_random_apply.py b/tests/ut/python/dataset/test_c_random_apply.py index dfa96b9767..29c64d55af 100644 --- a/tests/ut/python/dataset/test_c_random_apply.py +++ b/tests/ut/python/dataset/test_c_random_apply.py @@ -24,7 +24,7 @@ def test_random_apply(): def test_config(arr, op_list, prob=0.5): try: data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) - data = data.map(input_columns=["col"], operations=ops.RandomApply(op_list, prob)) + data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"]) res = [] for i in data.create_dict_iterator(num_epochs=1): res.append(i["col"].tolist()) diff --git a/tests/ut/python/dataset/test_cache_map.py b/tests/ut/python/dataset/test_cache_map.py index ac449b11d6..7640fd8bf0 100644 --- a/tests/ut/python/dataset/test_cache_map.py +++ b/tests/ut/python/dataset/test_cache_map.py @@ -48,7 +48,7 @@ def test_cache_map_basic1(): # This DATA_DIR only has 2 images in it ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache) decode_op = c_vision.Decode() - ds1 = ds1.map(input_columns=["image"], operations=decode_op) + ds1 = ds1.map(operations=decode_op, input_columns=["image"]) ds1 = ds1.repeat(4) filename = "cache_map_01_result.npz" @@ -77,7 +77,7 @@ def test_cache_map_basic2(): # This DATA_DIR only has 2 images in it ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR) decode_op = c_vision.Decode() - ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) + ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) ds1 = ds1.repeat(4) filename = "cache_map_02_result.npz" @@ -107,7 +107,7 @@ def test_cache_map_basic3(): ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR) decode_op = c_vision.Decode() ds1 = ds1.repeat(4) - ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) + ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) logger.info("ds1.dataset_size is ", ds1.get_dataset_size()) num_iter = 0 @@ -131,7 +131,7 @@ def test_cache_map_basic4(): ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache) decode_op = c_vision.Decode() ds1 = ds1.repeat(4) - ds1 = ds1.map(input_columns=["image"], operations=decode_op) + ds1 = ds1.map(operations=decode_op, input_columns=["image"]) logger.info("ds1.dataset_size is ", ds1.get_dataset_size()) shape = ds1.output_shapes() logger.info(shape) @@ -167,7 +167,7 @@ def test_cache_map_failure1(): # This DATA_DIR only has 2 images in it ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache) decode_op = c_vision.Decode() - ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) + ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) ds1 = ds1.repeat(4) try: diff --git a/tests/ut/python/dataset/test_cache_nomap.py b/tests/ut/python/dataset/test_cache_nomap.py index 6688737f43..df26859444 100644 --- a/tests/ut/python/dataset/test_cache_nomap.py +++ b/tests/ut/python/dataset/test_cache_nomap.py @@ -108,7 +108,7 @@ def test_cache_nomap_basic3(): some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache) decode_op = c_vision.Decode() - ds1 = ds1.map(input_columns=["image"], operations=decode_op) + ds1 = ds1.map(operations=decode_op, input_columns=["image"]) ds1 = ds1.repeat(4) num_iter = 0 @@ -160,7 +160,7 @@ def test_cache_nomap_basic4(): ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL) decode_op = c_vision.Decode() - ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) + ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) ds1 = ds1.repeat(4) num_iter = 0 @@ -197,7 +197,7 @@ def test_cache_nomap_basic5(): some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], cache=some_cache) decode_op = c_vision.Decode() - ds1 = ds1.map(input_columns=["image"], operations=decode_op) + ds1 = ds1.map(operations=decode_op, input_columns=["image"]) ds1 = ds1.repeat(4) num_iter = 0 @@ -237,7 +237,7 @@ def test_cache_nomap_basic6(): # there was not any cache. ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_shards=3, shard_id=1, cache=some_cache) decode_op = c_vision.Decode() - ds1 = ds1.map(input_columns=["image"], operations=decode_op) + ds1 = ds1.map(operations=decode_op, input_columns=["image"]) ds1 = ds1.repeat(4) num_iter = 0 @@ -273,7 +273,7 @@ def test_cache_nomap_basic7(): ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL, cache=some_cache) decode_op = c_vision.Decode() - ds1 = ds1.map(input_columns=["image"], operations=decode_op) + ds1 = ds1.map(operations=decode_op, input_columns=["image"]) ds1 = ds1.repeat(4) num_iter = 0 @@ -343,11 +343,11 @@ def test_cache_nomap_allowed_share2(): decode_op = c_vision.Decode() ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) + ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) ds1 = ds1.repeat(4) ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache) + ds2 = ds2.map(operations=decode_op, input_columns=["image"], cache=some_cache) ds2 = ds2.shuffle(buffer_size=2) num_iter = 0 @@ -418,10 +418,10 @@ def test_cache_nomap_allowed_share4(): decode_op = c_vision.Decode() ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=1) + ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache, num_parallel_workers=1) ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=2) + ds2 = ds2.map(operations=decode_op, input_columns=["image"], cache=some_cache, num_parallel_workers=2) num_iter = 0 for _ in ds1.create_dict_iterator(num_epochs=1): @@ -458,10 +458,10 @@ def test_cache_nomap_disallowed_share1(): rescale_op = c_vision.Rescale(1.0 / 255.0, -1.0) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) + ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - ds2 = ds2.map(input_columns=["image"], operations=rescale_op, cache=some_cache) + ds2 = ds2.map(operations=rescale_op, input_columns=["image"], cache=some_cache) num_iter = 0 for _ in ds1.create_dict_iterator(num_epochs=1): diff --git a/tests/ut/python/dataset/test_center_crop.py b/tests/ut/python/dataset/test_center_crop.py index 786af70d23..fd07934fc9 100644 --- a/tests/ut/python/dataset/test_center_crop.py +++ b/tests/ut/python/dataset/test_center_crop.py @@ -40,12 +40,12 @@ def test_center_crop_op(height=375, width=375, plot=False): decode_op = vision.Decode() # 3 images [375, 500] [600, 500] [512, 512] center_crop_op = vision.CenterCrop([height, width]) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=center_crop_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=center_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) image_cropped = [] image = [] @@ -67,8 +67,8 @@ def test_center_crop_md5(height=375, width=375): decode_op = vision.Decode() # 3 images [375, 500] [600, 500] [512, 512] center_crop_op = vision.CenterCrop([height, width]) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=center_crop_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=center_crop_op, input_columns=["image"]) # Compare with expected md5 from images filename = "center_crop_01_result.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) @@ -84,8 +84,8 @@ def test_center_crop_comp(height=375, width=375, plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() center_crop_op = vision.CenterCrop([height, width]) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=center_crop_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=center_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -95,7 +95,7 @@ def test_center_crop_comp(height=375, width=375, plot=False): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) image_c_cropped = [] image_py_cropped = [] @@ -126,11 +126,11 @@ def test_crop_grayscale(height=375, width=375): transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) # If input is grayscale, the output dimensions should be single channel crop_gray = vision.CenterCrop([height, width]) - data1 = data1.map(input_columns=["image"], operations=crop_gray) + data1 = data1.map(operations=crop_gray, input_columns=["image"]) for item1 in data1.create_dict_iterator(num_epochs=1): c_image = item1["image"] diff --git a/tests/ut/python/dataset/test_concat.py b/tests/ut/python/dataset/test_concat.py index 32c7a68b37..1feb04f125 100644 --- a/tests/ut/python/dataset/test_concat.py +++ b/tests/ut/python/dataset/test_concat.py @@ -121,7 +121,7 @@ def test_concat_05(): data2 = ds.GeneratorDataset(generator_10, ["col1"]) type_cast_op = C.TypeCast(mstype.float32) - data1 = data1.map(input_columns=["col1"], operations=type_cast_op) + data1 = data1.map(operations=type_cast_op, input_columns=["col1"]) data3 = data1 + data2 @@ -319,8 +319,8 @@ def test_concat_14(): F.Resize((224, 224)), F.ToTensor()]) - data1 = data1.map(input_columns=["image"], operations=transforms1) - data2 = data2.map(input_columns=["image"], operations=transforms1) + data1 = data1.map(operations=transforms1, input_columns=["image"]) + data2 = data2.map(operations=transforms1, input_columns=["image"]) data3 = data1 + data2 expected, output = [], [] diff --git a/tests/ut/python/dataset/test_concatenate_op.py b/tests/ut/python/dataset/test_concatenate_op.py index b404a565c0..242147ed33 100644 --- a/tests/ut/python/dataset/test_concatenate_op.py +++ b/tests/ut/python/dataset/test_concatenate_op.py @@ -31,7 +31,7 @@ def test_concatenate_op_all(): append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float) data = ds.GeneratorDataset(gen, column_names=["col"]) concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor) - data = data.map(input_columns=["col"], operations=concatenate_op) + data = data.map(operations=concatenate_op, input_columns=["col"]) expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, 11., 12.]) for data_row in data: @@ -45,7 +45,7 @@ def test_concatenate_op_none(): data = ds.GeneratorDataset(gen, column_names=["col"]) concatenate_op = data_trans.Concatenate() - data = data.map(input_columns=["col"], operations=concatenate_op) + data = data.map(operations=concatenate_op, input_columns=["col"]) for data_row in data: np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float)) @@ -59,7 +59,7 @@ def test_concatenate_op_string(): data = ds.GeneratorDataset(gen, column_names=["col"]) concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor) - data = data.map(input_columns=["col"], operations=concatenate_op) + data = data.map(operations=concatenate_op, input_columns=["col"]) expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S') for data_row in data: np.testing.assert_array_equal(data_row[0], expected) @@ -74,8 +74,8 @@ def test_concatenate_op_multi_input_string(): concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor) - data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"], - operations=concatenate_op) + data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"], + output_columns=["out1"]) expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S') for data_row in data: np.testing.assert_array_equal(data_row[0], expected) @@ -89,8 +89,8 @@ def test_concatenate_op_multi_input_numeric(): concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor) - data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"], - operations=concatenate_op) + data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"], + output_columns=["out1"]) expected = np.array([3, 5, 1, 2, 3, 4]) for data_row in data: np.testing.assert_array_equal(data_row[0], expected) @@ -104,7 +104,7 @@ def test_concatenate_op_type_mismatch(): data = ds.GeneratorDataset(gen, column_names=["col"]) concatenate_op = data_trans.Concatenate(0, prepend_tensor) - data = data.map(input_columns=["col"], operations=concatenate_op) + data = data.map(operations=concatenate_op, input_columns=["col"]) with pytest.raises(RuntimeError) as error_info: for _ in data: pass @@ -119,7 +119,7 @@ def test_concatenate_op_type_mismatch2(): data = ds.GeneratorDataset(gen, column_names=["col"]) concatenate_op = data_trans.Concatenate(0, prepend_tensor) - data = data.map(input_columns=["col"], operations=concatenate_op) + data = data.map(operations=concatenate_op, input_columns=["col"]) with pytest.raises(RuntimeError) as error_info: for _ in data: pass @@ -134,7 +134,7 @@ def test_concatenate_op_incorrect_dim(): concatenate_op = data_trans.Concatenate(0, prepend_tensor) data = ds.GeneratorDataset(gen, column_names=["col"]) - data = data.map(input_columns=["col"], operations=concatenate_op) + data = data.map(operations=concatenate_op, input_columns=["col"]) with pytest.raises(RuntimeError) as error_info: for _ in data: pass @@ -155,7 +155,7 @@ def test_concatenate_op_negative_axis(): append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float) data = ds.GeneratorDataset(gen, column_names=["col"]) concatenate_op = data_trans.Concatenate(-1, prepend_tensor, append_tensor) - data = data.map(input_columns=["col"], operations=concatenate_op) + data = data.map(operations=concatenate_op, input_columns=["col"]) expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, 11., 12.]) for data_row in data: diff --git a/tests/ut/python/dataset/test_config.py b/tests/ut/python/dataset/test_config.py index 0677ea5ca3..efec569f66 100644 --- a/tests/ut/python/dataset/test_config.py +++ b/tests/ut/python/dataset/test_config.py @@ -86,12 +86,12 @@ def test_pipeline(): num_parallel_workers_original = ds.config.get_num_parallel_workers() data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns=["image"], operations=[c_vision.Decode(True)]) + data1 = data1.map(operations=[c_vision.Decode(True)], input_columns=["image"]) ds.serialize(data1, "testpipeline.json") data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, num_parallel_workers=num_parallel_workers_original, shuffle=False) - data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode(True)]) + data2 = data2.map(operations=[c_vision.Decode(True)], input_columns=["image"]) ds.serialize(data2, "testpipeline2.json") # check that the generated output is different @@ -131,14 +131,14 @@ def test_deterministic_run_fail(): # outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) decode_op = c_vision.Decode() - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_crop_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) # If seed is set up on constructor - data2 = data2.map(input_columns=["image"], operations=random_crop_op) + data2 = data2.map(operations=random_crop_op, input_columns=["image"]) try: dataset_equal(data1, data2, 0) @@ -171,16 +171,16 @@ def test_seed_undeterministic(): # We get the seed when constructor is called random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) decode_op = c_vision.Decode() - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_crop_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) # Since seed is set up on constructor, so the two ops output deterministic sequence. # Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) - data2 = data2.map(input_columns=["image"], operations=random_crop_op2) + data2 = data2.map(operations=random_crop_op2, input_columns=["image"]) try: dataset_equal(data1, data2, 0) except Exception as e: @@ -211,15 +211,15 @@ def test_seed_deterministic(): # seed will be read in during constructor call random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) decode_op = c_vision.Decode() - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_crop_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) # If seed is set up on constructor, so the two ops output deterministic sequence random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) - data2 = data2.map(input_columns=["image"], operations=random_crop_op2) + data2 = data2.map(operations=random_crop_op2, input_columns=["image"]) dataset_equal(data1, data2, 0) @@ -246,15 +246,15 @@ def test_deterministic_run_distribution(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1) decode_op = c_vision.Decode() - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_horizontal_flip_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_horizontal_flip_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) # If seed is set up on constructor, so the two ops output deterministic sequence random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1) - data2 = data2.map(input_columns=["image"], operations=random_horizontal_flip_op2) + data2 = data2.map(operations=random_horizontal_flip_op2, input_columns=["image"]) dataset_equal(data1, data2, 0) @@ -285,7 +285,7 @@ def test_deterministic_python_seed(): py_vision.ToTensor(), ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) data1_output = [] # config.set_seed() calls random.seed() for data_one in data1.create_dict_iterator(num_epochs=1): @@ -293,7 +293,7 @@ def test_deterministic_python_seed(): # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) # config.set_seed() calls random.seed(), resets seed for next dataset iterator ds.config.set_seed(0) @@ -328,7 +328,7 @@ def test_deterministic_python_seed_multi_thread(): py_vision.ToTensor(), ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data1 = data1.map(input_columns=["image"], operations=transform, python_multiprocessing=True) + data1 = data1.map(operations=transform, input_columns=["image"], python_multiprocessing=True) data1_output = [] # config.set_seed() calls random.seed() for data_one in data1.create_dict_iterator(num_epochs=1): @@ -337,7 +337,7 @@ def test_deterministic_python_seed_multi_thread(): # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # If seed is set up on constructor - data2 = data2.map(input_columns=["image"], operations=transform, python_multiprocessing=True) + data2 = data2.map(operations=transform, input_columns=["image"], python_multiprocessing=True) # config.set_seed() calls random.seed() ds.config.set_seed(0) diff --git a/tests/ut/python/dataset/test_cut_out.py b/tests/ut/python/dataset/test_cut_out.py index 61b849b08d..c56c5c0f5a 100644 --- a/tests/ut/python/dataset/test_cut_out.py +++ b/tests/ut/python/dataset/test_cut_out.py @@ -30,6 +30,7 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" GENERATE_GOLDEN = False + def test_cut_out_op(plot=False): """ Test Cutout @@ -45,7 +46,7 @@ def test_cut_out_op(plot=False): f.RandomErasing(value='random') ] transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) - data1 = data1.map(input_columns=["image"], operations=transform_1) + data1 = data1.map(operations=transform_1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -57,7 +58,7 @@ def test_cut_out_op(plot=False): cut_out_op ] - data2 = data2.map(input_columns=["image"], operations=transforms_2) + data2 = data2.map(operations=transforms_2, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -91,7 +92,7 @@ def test_cut_out_op_multicut(plot=False): f.ToTensor(), ] transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) - data1 = data1.map(input_columns=["image"], operations=transform_1) + data1 = data1.map(operations=transform_1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -103,7 +104,7 @@ def test_cut_out_op_multicut(plot=False): cut_out_op ] - data2 = data2.map(input_columns=["image"], operations=transforms_2) + data2 = data2.map(operations=transforms_2, input_columns=["image"]) num_iter = 0 image_list_1, image_list_2 = [], [] @@ -136,8 +137,8 @@ def test_cut_out_md5(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c.Decode() cut_out_op = c.CutOut(100) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=cut_out_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=cut_out_op, input_columns=["image"]) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ @@ -146,7 +147,7 @@ def test_cut_out_md5(): f.Cutout(100) ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename1 = "cut_out_01_c_result.npz" @@ -174,7 +175,7 @@ def test_cut_out_comp(plot=False): f.Cutout(200) ] transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) - data1 = data1.map(input_columns=["image"], operations=transform_1) + data1 = data1.map(operations=transform_1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -184,7 +185,7 @@ def test_cut_out_comp(plot=False): c.CutOut(200) ] - data2 = data2.map(input_columns=["image"], operations=transforms_2) + data2 = data2.map(operations=transforms_2, input_columns=["image"]) num_iter = 0 image_list_1, image_list_2 = [], [] diff --git a/tests/ut/python/dataset/test_cutmix_batch_op.py b/tests/ut/python/dataset/test_cutmix_batch_op.py index 35e1a4be61..8fc6a78f52 100644 --- a/tests/ut/python/dataset/test_cutmix_batch_op.py +++ b/tests/ut/python/dataset/test_cutmix_batch_op.py @@ -51,12 +51,12 @@ def test_cutmix_batch_success1(plot=False): # CutMix Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) hwc2chw_op = vision.HWC2CHW() - data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) + data1 = data1.map(operations=hwc2chw_op, input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW, 2.0, 0.5) data1 = data1.batch(5, drop_remainder=True) - data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) + data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) images_cutmix = None for idx, (image, _) in enumerate(data1): @@ -94,12 +94,12 @@ def test_cutmix_batch_success2(plot=False): # CutMix Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) - rescale_op = vision.Rescale((1.0/255.0), 0.0) - data1 = data1.map(input_columns=["image"], operations=rescale_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) + rescale_op = vision.Rescale((1.0 / 255.0), 0.0) + data1 = data1.map(operations=rescale_op, input_columns=["image"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data1 = data1.batch(5, drop_remainder=True) - data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) + data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) images_cutmix = None for idx, (image, _) in enumerate(data1): @@ -125,7 +125,7 @@ def test_cutmix_batch_success3(plot=False): ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() - ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) + ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) images_original = None @@ -139,14 +139,14 @@ def test_cutmix_batch_success3(plot=False): data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() - data1 = data1.map(input_columns=["image"], operations=[decode_op]) + data1 = data1.map(operations=[decode_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data1 = data1.batch(4, pad_info={}, drop_remainder=True) - data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) + data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) images_cutmix = None for idx, (image, _) in enumerate(data1): @@ -172,7 +172,7 @@ def test_cutmix_batch_success4(plot=False): ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) decode_op = vision.Decode() - ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) + ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) ds_original = ds_original.batch(2, drop_remainder=True) images_original = None @@ -186,14 +186,14 @@ def test_cutmix_batch_success4(plot=False): data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False) decode_op = vision.Decode() - data1 = data1.map(input_columns=["image"], operations=[decode_op]) + data1 = data1.map(operations=[decode_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=100) - data1 = data1.map(input_columns=["attr"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["attr"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9) data1 = data1.batch(2, drop_remainder=True) - data1 = data1.map(input_columns=["image", "attr"], operations=cutmix_batch_op) + data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "attr"]) images_cutmix = None for idx, (image, _) in enumerate(data1): @@ -223,10 +223,10 @@ def test_cutmix_batch_nhwc_md5(): data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data = data.map(input_columns=["label"], operations=one_hot_op) + data = data.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data = data.batch(5, drop_remainder=True) - data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op) + data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"]) filename = "cutmix_batch_c_nhwc_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -247,12 +247,12 @@ def test_cutmix_batch_nchw_md5(): # CutMixBatch Images data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) hwc2chw_op = vision.HWC2CHW() - data = data.map(input_columns=["image"], operations=hwc2chw_op) + data = data.map(operations=hwc2chw_op, input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) - data = data.map(input_columns=["label"], operations=one_hot_op) + data = data.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW) data = data.batch(5, drop_remainder=True) - data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op) + data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"]) filename = "cutmix_batch_c_nchw_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -273,10 +273,10 @@ def test_cutmix_batch_fail1(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) with pytest.raises(RuntimeError) as error: - data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) + data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image @@ -297,7 +297,7 @@ def test_cutmix_batch_fail2(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) with pytest.raises(ValueError) as error: vision.CutMixBatch(mode.ImageBatchFormat.NHWC, -1) error_message = "Input is not within the required interval" @@ -315,7 +315,7 @@ def test_cutmix_batch_fail3(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) with pytest.raises(ValueError) as error: vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, 2) error_message = "Input is not within the required interval" @@ -333,7 +333,7 @@ def test_cutmix_batch_fail4(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) with pytest.raises(ValueError) as error: vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, -1) error_message = "Input is not within the required interval" @@ -351,10 +351,10 @@ def test_cutmix_batch_fail5(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data1 = data1.batch(5, drop_remainder=True) - data1 = data1.map(input_columns=["image"], operations=cutmix_batch_op) + data1 = data1.map(operations=cutmix_batch_op, input_columns=["image"]) with pytest.raises(RuntimeError) as error: images_cutmix = np.array([]) @@ -378,10 +378,10 @@ def test_cutmix_batch_fail6(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW) data1 = data1.batch(5, drop_remainder=True) - data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) + data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) with pytest.raises(RuntimeError) as error: images_cutmix = np.array([]) @@ -406,7 +406,7 @@ def test_cutmix_batch_fail7(): cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data1 = data1.batch(5, drop_remainder=True) - data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) + data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) with pytest.raises(RuntimeError) as error: images_cutmix = np.array([]) @@ -430,7 +430,7 @@ def test_cutmix_batch_fail8(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) with pytest.raises(ValueError) as error: vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.0) error_message = "Input is not within the required interval" diff --git a/tests/ut/python/dataset/test_dataset_numpy_slices.py b/tests/ut/python/dataset/test_dataset_numpy_slices.py index 83fee94346..861778c654 100644 --- a/tests/ut/python/dataset/test_dataset_numpy_slices.py +++ b/tests/ut/python/dataset/test_dataset_numpy_slices.py @@ -59,7 +59,7 @@ def test_numpy_slices_list_append(): data1 = de.TFRecordDataset(DATA_DIR) resize_op = vision.Resize((resize_height, resize_width)) - data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True), resize_op]) + data1 = data1.map(operations=[vision.Decode(True), resize_op], input_columns=["image"]) res = [] for data in data1.create_dict_iterator(num_epochs=1): diff --git a/tests/ut/python/dataset/test_datasets_celeba.py b/tests/ut/python/dataset/test_datasets_celeba.py index 44ef32c7c7..889e18cec3 100644 --- a/tests/ut/python/dataset/test_datasets_celeba.py +++ b/tests/ut/python/dataset/test_datasets_celeba.py @@ -46,8 +46,8 @@ def test_celeba_dataset_op(): data = data.repeat(2) center_crop = vision.CenterCrop(crop_size) resize_op = vision.Resize(resize_size, Inter.LINEAR) # Bilinear mode - data = data.map(input_columns=["image"], operations=center_crop) - data = data.map(input_columns=["image"], operations=resize_op) + data = data.map(operations=center_crop, input_columns=["image"]) + data = data.map(operations=resize_op, input_columns=["image"]) count = 0 for item in data.create_dict_iterator(num_epochs=1): diff --git a/tests/ut/python/dataset/test_datasets_coco.py b/tests/ut/python/dataset/test_datasets_coco.py index 39609092bf..f91a1f2339 100644 --- a/tests/ut/python/dataset/test_datasets_coco.py +++ b/tests/ut/python/dataset/test_datasets_coco.py @@ -25,6 +25,7 @@ INVALID_FILE = "../data/dataset/testCOCO/annotations/invalid.json" LACKOFIMAGE_FILE = "../data/dataset/testCOCO/annotations/lack_of_images.json" INVALID_CATEGORY_ID_FILE = "../data/dataset/testCOCO/annotations/invalid_category_id.json" + def test_coco_detection(): data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True, shuffle=False) @@ -57,6 +58,7 @@ def test_coco_detection(): np.testing.assert_array_equal(np.array([[5]]), category_id[4]) np.testing.assert_array_equal(np.array([[6]]), category_id[5]) + def test_coco_stuff(): data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff", decode=True, shuffle=False) @@ -97,6 +99,7 @@ def test_coco_stuff(): segmentation[5]) np.testing.assert_array_equal(np.array([[0]]), iscrowd[5]) + def test_coco_keypoint(): data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint", decode=True, shuffle=False) @@ -124,6 +127,7 @@ def test_coco_keypoint(): keypoints[1]) np.testing.assert_array_equal(np.array([[10]]), num_keypoints[1]) + def test_coco_panoptic(): data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True, shuffle=False) num_iter = 0 @@ -151,6 +155,7 @@ def test_coco_panoptic(): np.testing.assert_array_equal(np.array([[0], [0]]), iscrowd[1]) np.testing.assert_array_equal(np.array([[43102], [6079]]), area[1]) + def test_coco_detection_classindex(): data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) class_index = data1.get_class_indexing() @@ -161,6 +166,7 @@ def test_coco_detection_classindex(): num_iter += 1 assert num_iter == 6 + def test_coco_panootic_classindex(): data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True) class_index = data1.get_class_indexing() @@ -170,6 +176,7 @@ def test_coco_panootic_classindex(): num_iter += 1 assert num_iter == 2 + def test_coco_case_0(): data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) data1 = data1.shuffle(10) @@ -179,6 +186,7 @@ def test_coco_case_0(): num_iter += 1 assert num_iter == 2 + def test_coco_case_1(): data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) sizes = [0.5, 0.5] @@ -194,28 +202,31 @@ def test_coco_case_1(): num_iter += 1 assert num_iter == 3 + def test_coco_case_2(): data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) resize_op = vision.Resize((224, 224)) - data1 = data1.map(input_columns=["image"], operations=resize_op) + data1 = data1.map(operations=resize_op, input_columns=["image"]) data1 = data1.repeat(4) num_iter = 0 for _ in data1.__iter__(): num_iter += 1 assert num_iter == 24 + def test_coco_case_3(): data1 = ds.CocoDataset(DATA_DIR_2, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) resize_op = vision.Resize((224, 224)) - data1 = data1.map(input_columns=["image"], operations=resize_op) + data1 = data1.map(operations=resize_op, input_columns=["image"]) data1 = data1.repeat(4) num_iter = 0 for _ in data1.__iter__(): num_iter += 1 assert num_iter == 24 + def test_coco_case_exception(): try: data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection") diff --git a/tests/ut/python/dataset/test_datasets_generator.py b/tests/ut/python/dataset/test_datasets_generator.py index a48875973a..ed2543094a 100644 --- a/tests/ut/python/dataset/test_datasets_generator.py +++ b/tests/ut/python/dataset/test_datasets_generator.py @@ -25,6 +25,7 @@ def generator_1d(): for i in range(64): yield (np.array([i]),) + class DatasetGenerator: def __init__(self): pass @@ -241,11 +242,11 @@ def test_generator_8(): # apply dataset operations data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) - data1 = data1.map(input_columns="col0", output_columns="out0", operations=(lambda x: x * 3), + data1 = data1.map(operations=(lambda x: x * 3), input_columns="col0", output_columns="out0", num_parallel_workers=2) - data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x * 7, x)), + data1 = data1.map(operations=(lambda x: (x * 7, x)), input_columns="col1", output_columns=["out1", "out2"], num_parallel_workers=2, column_order=["out0", "out1", "out2"]) - data1 = data1.map(input_columns="out2", output_columns="out2", operations=(lambda x: x + 1), + data1 = data1.map(operations=(lambda x: x + 1), input_columns="out2", output_columns="out2", num_parallel_workers=2) i = 0 @@ -268,9 +269,9 @@ def test_generator_9(): # apply dataset operations data1 = ds.GeneratorDataset(generator_mc(2048), ["image", "label"]) data2 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"]) - data1 = data1.map(input_columns="label", operations=(lambda x: x * 3), + data1 = data1.map(operations=(lambda x: x * 3), input_columns="label", num_parallel_workers=4) - data2 = data2.map(input_columns="label", operations=(lambda x: x * 3), + data2 = data2.map(operations=(lambda x: x * 3), input_columns="label", num_parallel_workers=4) # Expected column order is not changed. @@ -298,7 +299,7 @@ def test_generator_10(): # apply dataset operations data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) - data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), + data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"], column_order=['col0', 'out1', 'out2'], num_parallel_workers=2) # Expected column order is |col0|out1|out2| @@ -322,7 +323,7 @@ def test_generator_11(): # apply dataset operations data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) - data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), + data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"], column_order=['out1', 'out2'], num_parallel_workers=2) # Expected column order is |out1|out2| @@ -503,7 +504,7 @@ def test_generator_error_3(): with pytest.raises(ValueError) as info: # apply dataset operations data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"]) - data1 = data1.map(input_columns=["label"], output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), + data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"], output_columns=["out1", "out2"], num_parallel_workers=2) for _ in data1: @@ -515,7 +516,7 @@ def test_generator_error_4(): with pytest.raises(RuntimeError) as info: # apply dataset operations data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"]) - data1 = data1.map(input_columns=["label"], operations=(lambda x: (x, x * 5)), + data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"], num_parallel_workers=2) for _ in data1: @@ -706,6 +707,7 @@ def test_generator_dataset_size_4(): num_rows = num_rows + 1 assert data_size == num_rows + def test_generator_dataset_size_5(): """ Test get_dataset_size after create_dict_iterator diff --git a/tests/ut/python/dataset/test_datasets_manifestop.py b/tests/ut/python/dataset/test_datasets_manifestop.py index e28e4b4ab7..d8abcf9c9a 100644 --- a/tests/ut/python/dataset/test_datasets_manifestop.py +++ b/tests/ut/python/dataset/test_datasets_manifestop.py @@ -103,8 +103,8 @@ def test_manifest_dataset_multi_label_onehot(): data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False) expect_label = [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 1]]] one_hot_encode = data_trans.OneHot(3) - data = data.map(input_columns=["label"], operations=one_hot_encode) - data = data.map(input_columns=["label"], operations=multi_label_hot) + data = data.map(operations=one_hot_encode, input_columns=["label"]) + data = data.map(operations=multi_label_hot, input_columns=["label"]) data = data.batch(2) count = 0 for item in data.create_dict_iterator(num_epochs=1): diff --git a/tests/ut/python/dataset/test_datasets_voc.py b/tests/ut/python/dataset/test_datasets_voc.py index 1636657407..972a1ce001 100644 --- a/tests/ut/python/dataset/test_datasets_voc.py +++ b/tests/ut/python/dataset/test_datasets_voc.py @@ -85,8 +85,8 @@ def test_case_0(): resize_op = vision.Resize((224, 224)) - data1 = data1.map(input_columns=["image"], operations=resize_op) - data1 = data1.map(input_columns=["target"], operations=resize_op) + data1 = data1.map(operations=resize_op, input_columns=["image"]) + data1 = data1.map(operations=resize_op, input_columns=["target"]) repeat_num = 4 data1 = data1.repeat(repeat_num) batch_size = 2 @@ -103,7 +103,7 @@ def test_case_1(): resize_op = vision.Resize((224, 224)) - data1 = data1.map(input_columns=["image"], operations=resize_op) + data1 = data1.map(operations=resize_op, input_columns=["image"]) repeat_num = 4 data1 = data1.repeat(repeat_num) batch_size = 2 diff --git a/tests/ut/python/dataset/test_decode.py b/tests/ut/python/dataset/test_decode.py index 6040446ffe..266681e2ca 100644 --- a/tests/ut/python/dataset/test_decode.py +++ b/tests/ut/python/dataset/test_decode.py @@ -36,7 +36,7 @@ def test_decode_op(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Serialize and Load dataset requires using vision.Decode instead of vision.Decode(). - data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)]) + data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -57,7 +57,7 @@ def test_decode_op_tf_file_dataset(): # Decode with rgb format set to True data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.FILES) - data1 = data1.map(input_columns=["image"], operations=vision.Decode(True)) + data1 = data1.map(operations=vision.Decode(True), input_columns=["image"]) for item in data1.create_dict_iterator(num_epochs=1): logger.info('decode == {}'.format(item['image'])) diff --git a/tests/ut/python/dataset/test_deviceop_cpu.py b/tests/ut/python/dataset/test_deviceop_cpu.py index 5d0d320704..89c8791d27 100644 --- a/tests/ut/python/dataset/test_deviceop_cpu.py +++ b/tests/ut/python/dataset/test_deviceop_cpu.py @@ -54,8 +54,8 @@ def test_case_1(): resize_op = vision.Resize((resize_height, resize_width)) # apply map operations on images - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=resize_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=resize_op, input_columns=["image"]) batch_size = 3 data = data.batch(batch_size, drop_remainder=True) @@ -79,8 +79,8 @@ def test_case_2(): resize_op = vision.Resize((resize_height, resize_width)) # apply map operations on images - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=resize_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=resize_op, input_columns=["image"]) batch_size = 2 data = data.batch(batch_size, drop_remainder=True) @@ -107,8 +107,8 @@ def test_case_3(): resize_op = vision.Resize((resize_height, resize_width)) # apply map operations on images - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=resize_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=resize_op, input_columns=["image"]) data = data.repeat(2) diff --git a/tests/ut/python/dataset/test_duplicate_op.py b/tests/ut/python/dataset/test_duplicate_op.py index d0db32dba8..c9ab323691 100644 --- a/tests/ut/python/dataset/test_duplicate_op.py +++ b/tests/ut/python/dataset/test_duplicate_op.py @@ -24,8 +24,8 @@ import mindspore.dataset.transforms.c_transforms as ops def compare(array): data = ds.NumpySlicesDataset([array], column_names="x") array = np.array(array) - data = data.map(input_columns=["x"], output_columns=["x", "y"], column_order=["x", "y"], - operations=ops.Duplicate()) + data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"], + column_order=["x", "y"]) for d in data.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(array, d["x"]) np.testing.assert_array_equal(array, d["y"]) diff --git a/tests/ut/python/dataset/test_epoch_ctrl.py b/tests/ut/python/dataset/test_epoch_ctrl.py index 3a00cb5554..ac655df9d2 100644 --- a/tests/ut/python/dataset/test_epoch_ctrl.py +++ b/tests/ut/python/dataset/test_epoch_ctrl.py @@ -79,7 +79,7 @@ def test_decode_op(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Serialize and Load dataset requires using vision.Decode instead of vision.Decode(). - data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)]) + data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) diff --git a/tests/ut/python/dataset/test_equalize.py b/tests/ut/python/dataset/test_equalize.py index 7d4b2590d5..c67552a541 100644 --- a/tests/ut/python/dataset/test_equalize.py +++ b/tests/ut/python/dataset/test_equalize.py @@ -43,8 +43,7 @@ def test_equalize_py(plot=False): F.Resize((224, 224)), F.ToTensor()]) - ds_original = ds.map(input_columns="image", - operations=transforms_original) + ds_original = ds.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) @@ -64,8 +63,7 @@ def test_equalize_py(plot=False): F.Equalize(), F.ToTensor()]) - ds_equalize = ds.map(input_columns="image", - operations=transforms_equalize) + ds_equalize = ds.map(operations=transforms_equalize, input_columns="image") ds_equalize = ds_equalize.batch(512) @@ -98,8 +96,7 @@ def test_equalize_c(plot=False): transforms_original = [C.Decode(), C.Resize(size=[224, 224])] - ds_original = ds.map(input_columns="image", - operations=transforms_original) + ds_original = ds.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) @@ -117,8 +114,7 @@ def test_equalize_c(plot=False): transform_equalize = [C.Decode(), C.Resize(size=[224, 224]), C.Equalize()] - ds_equalize = ds.map(input_columns="image", - operations=transform_equalize) + ds_equalize = ds.map(operations=transform_equalize, input_columns="image") ds_equalize = ds_equalize.batch(512) @@ -147,11 +143,9 @@ def test_equalize_py_c(plot=False): # equalize Images in cpp ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), C.Resize((224, 224))]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) - ds_c_equalize = ds.map(input_columns="image", - operations=C.Equalize()) + ds_c_equalize = ds.map(operations=C.Equalize(), input_columns="image") ds_c_equalize = ds_c_equalize.batch(512) @@ -165,16 +159,14 @@ def test_equalize_py_c(plot=False): # Equalize images in python ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), C.Resize((224, 224))]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8), F.ToPIL(), F.Equalize(), np.array]) - ds_p_equalize = ds.map(input_columns="image", - operations=transforms_p_equalize) + ds_p_equalize = ds.map(operations=transforms_p_equalize, input_columns="image") ds_p_equalize = ds_p_equalize.batch(512) @@ -206,13 +198,10 @@ def test_equalize_one_channel(): try: ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((224, 224)), - lambda img: np.array(img[:, :, 0])]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), + lambda img: np.array(img[:, :, 0])], input_columns=["image"]) - ds.map(input_columns="image", - operations=c_op) + ds.map(operations=c_op, input_columns="image") except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) @@ -225,8 +214,7 @@ def test_equalize_mnist_c(plot=False): """ logger.info("Test Equalize C Op With MNIST Images") ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) - ds_equalize_c = ds.map(input_columns="image", - operations=C.Equalize()) + ds_equalize_c = ds.map(operations=C.Equalize(), input_columns="image") ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) images = [] @@ -259,7 +247,7 @@ def test_equalize_md5_py(): F.Equalize(), F.ToTensor()]) - data1 = data1.map(input_columns="image", operations=transforms) + data1 = data1.map(operations=transforms, input_columns="image") # Compare with expected md5 from images filename = "equalize_01_result.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) @@ -279,7 +267,7 @@ def test_equalize_md5_c(): C.Equalize(), F.ToTensor()] - data = ds.map(input_columns="image", operations=transforms_equalize) + data = ds.map(operations=transforms_equalize, input_columns="image") # Compare with expected md5 from images filename = "equalize_01_result_c.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) diff --git a/tests/ut/python/dataset/test_exceptions.py b/tests/ut/python/dataset/test_exceptions.py index 7b89ad3b43..6f9d156174 100644 --- a/tests/ut/python/dataset/test_exceptions.py +++ b/tests/ut/python/dataset/test_exceptions.py @@ -29,7 +29,7 @@ def test_exception_01(): logger.info("test_exception_01") data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"]) with pytest.raises(TypeError) as info: - data.map(input_columns=["image"], operations=vision.Resize(100, 100)) + data.map(operations=vision.Resize(100, 100), input_columns=["image"]) assert "Argument interpolation with value 100 is not of type (,)" in str(info.value) @@ -45,8 +45,8 @@ def test_exception_02(): num_samples = 1 data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) - data = data.map(input_columns=["image"], operations=vision.Decode()) - data = data.map(input_columns=["image"], operations=vision.Resize((100, 100))) + data = data.map(operations=vision.Decode(), input_columns=["image"]) + data = data.map(operations=vision.Resize((100, 100)), input_columns=["image"]) # Confirm 1 sample in dataset assert sum([1 for _ in data]) == 1 num_iters = 0 diff --git a/tests/ut/python/dataset/test_fill_op.py b/tests/ut/python/dataset/test_fill_op.py index 657a529723..025b65840b 100644 --- a/tests/ut/python/dataset/test_fill_op.py +++ b/tests/ut/python/dataset/test_fill_op.py @@ -28,7 +28,7 @@ def test_fillop_basic(): data = ds.GeneratorDataset(gen, column_names=["col"]) fill_op = data_trans.Fill(3) - data = data.map(input_columns=["col"], operations=fill_op) + data = data.map(operations=fill_op, input_columns=["col"]) expected = np.array([3, 3, 3, 3], dtype=np.uint8) for data_row in data: np.testing.assert_array_equal(data_row[0], expected) @@ -41,7 +41,7 @@ def test_fillop_down_type_cast(): data = ds.GeneratorDataset(gen, column_names=["col"]) fill_op = data_trans.Fill(-3) - data = data.map(input_columns=["col"], operations=fill_op) + data = data.map(operations=fill_op, input_columns=["col"]) expected = np.array([253, 253, 253, 253], dtype=np.uint8) for data_row in data: np.testing.assert_array_equal(data_row[0], expected) @@ -54,7 +54,7 @@ def test_fillop_up_type_cast(): data = ds.GeneratorDataset(gen, column_names=["col"]) fill_op = data_trans.Fill(3) - data = data.map(input_columns=["col"], operations=fill_op) + data = data.map(operations=fill_op, input_columns=["col"]) expected = np.array([3., 3., 3., 3.], dtype=np.float) for data_row in data: np.testing.assert_array_equal(data_row[0], expected) @@ -67,7 +67,7 @@ def test_fillop_string(): data = ds.GeneratorDataset(gen, column_names=["col"]) fill_op = data_trans.Fill("error") - data = data.map(input_columns=["col"], operations=fill_op) + data = data.map(operations=fill_op, input_columns=["col"]) expected = np.array(['error', 'error'], dtype='S') for data_row in data: np.testing.assert_array_equal(data_row[0], expected) @@ -79,7 +79,7 @@ def test_fillop_error_handling(): data = ds.GeneratorDataset(gen, column_names=["col"]) fill_op = data_trans.Fill("words") - data = data.map(input_columns=["col"], operations=fill_op) + data = data.map(operations=fill_op, input_columns=["col"]) with pytest.raises(RuntimeError) as error_info: for _ in data: diff --git a/tests/ut/python/dataset/test_filterop.py b/tests/ut/python/dataset/test_filterop.py index 2a7381b9a0..7d6cc3ae8f 100644 --- a/tests/ut/python/dataset/test_filterop.py +++ b/tests/ut/python/dataset/test_filterop.py @@ -30,7 +30,7 @@ def test_diff_predicate_func(): cde.Resize([64, 64]) ] dataset = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], shuffle=False) - dataset = dataset.map(input_columns=["image"], operations=transforms, num_parallel_workers=1) + dataset = dataset.map(operations=transforms, input_columns=["image"], num_parallel_workers=1) dataset = dataset.filter(input_columns=["image", "label"], predicate=predicate_func, num_parallel_workers=4) num_iter = 0 @@ -261,8 +261,8 @@ def func_map_part(data_col1): # test with map def test_filter_by_generator_with_map_all_col(): dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"]) - dataset_map = dataset.map(input_columns=["col1"], output_columns=["col1"], operations=func_map_part) - # dataset_map = dataset.map( operations=func_map_part) + dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["col1"]) + # dataset_map = dataset.map(operations=func_map_part) dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1) num_iter = 0 ret_data = [] @@ -277,7 +277,7 @@ def test_filter_by_generator_with_map_all_col(): # test with map def test_filter_by_generator_with_map_part_col(): dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"]) - dataset_map = dataset.map(input_columns=["col1"], output_columns=["out1"], operations=func_map_part) + dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"]) dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4) num_iter = 0 @@ -328,7 +328,7 @@ def filter_func_input_column3(col1): # test with input_columns def test_filter_by_generator_with_input_column(): dataset = ds.GeneratorDataset(generator_mc(64), ["col1", "col2"]) - dataset_map = dataset.map(input_columns=["col1"], output_columns=["out1"], operations=func_map_part) + dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"]) dataset_f1 = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_input_column1, num_parallel_workers=4) dataset_f2 = dataset_f1.filter(input_columns=["out1"], predicate=filter_func_input_column2, num_parallel_workers=4) @@ -382,7 +382,7 @@ def test_filter_by_generator_Partial1(): dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"]) dataset_zip = ds.zip((dataset1, dataset2)) dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) - dataset_map = dataset_f1.map(input_columns=["col1"], output_columns=["out1"], operations=lambda x1: x1 + 400) + dataset_map = dataset_f1.map(operations=lambda x1: x1 + 400, input_columns=["col1"], output_columns=["out1"]) ret = [] for item in dataset_map.create_dict_iterator(num_epochs=1): ret.append(item["out1"]) @@ -399,8 +399,8 @@ def test_filter_by_generator_Partial2(): dataset2f = dataset2.filter(input_columns=["col3"], predicate=lambda x: x not in [203, 207, 209], num_parallel_workers=2) dataset_zip = ds.zip((dataset1f, dataset2f)) - dataset_map = dataset_zip.map(input_columns=["col1", "col3"], output_columns=["out1", "out3"], - operations=lambda x1, x3: (x1 + 400, x3 + 500)) + dataset_map = dataset_zip.map(operations=lambda x1, x3: (x1 + 400, x3 + 500), input_columns=["col1", "col3"], + output_columns=["out1", "out3"]) ret1 = [] ret3 = [] for item in dataset_map.create_dict_iterator(num_epochs=1): @@ -484,6 +484,7 @@ def test_filter_by_generator_with_map_all_sort(): assert ret_data[0]["col1"] == 0 assert ret_data[9]["col6"] == 509 + def test_filter_by_generator_get_dataset_size(): dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4) diff --git a/tests/ut/python/dataset/test_five_crop.py b/tests/ut/python/dataset/test_five_crop.py index c119368540..601426a052 100644 --- a/tests/ut/python/dataset/test_five_crop.py +++ b/tests/ut/python/dataset/test_five_crop.py @@ -41,7 +41,7 @@ def test_five_crop_op(plot=False): vision.ToTensor(), ] transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) - data1 = data1.map(input_columns=["image"], operations=transform_1) + data1 = data1.map(operations=transform_1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -51,7 +51,7 @@ def test_five_crop_op(plot=False): lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images ] transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2) - data2 = data2.map(input_columns=["image"], operations=transform_2) + data2 = data2.map(operations=transform_2, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -85,7 +85,7 @@ def test_five_crop_error_msg(): vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) with pytest.raises(RuntimeError) as info: for _ in data: @@ -110,7 +110,7 @@ def test_five_crop_md5(): lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "five_crop_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) diff --git a/tests/ut/python/dataset/test_from_dataset.py b/tests/ut/python/dataset/test_from_dataset.py index e285f1fc21..95c4b32daa 100644 --- a/tests/ut/python/dataset/test_from_dataset.py +++ b/tests/ut/python/dataset/test_from_dataset.py @@ -26,7 +26,7 @@ def test_demo_basic_from_dataset(): vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None, special_tokens=["", ""], special_first=True) - data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "")) + data = data.map(operations=text.Lookup(vocab, ""), input_columns=["text"]) res = [] for d in data.create_dict_iterator(num_epochs=1): res.append(d["text"].item()) @@ -36,10 +36,10 @@ def test_demo_basic_from_dataset(): def test_demo_basic_from_dataset_with_tokenizer(): """ this is a tutorial on how from_dataset should be used in a normal use case with tokenizer""" data = ds.TextFileDataset("../data/dataset/testTokenizerData/1.txt", shuffle=False) - data = data.map(input_columns=["text"], operations=text.UnicodeCharTokenizer()) + data = data.map(operations=text.UnicodeCharTokenizer(), input_columns=["text"]) vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["", ""], special_first=True) - data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "")) + data = data.map(operations=text.Lookup(vocab, ""), input_columns=["text"]) res = [] for d in data.create_dict_iterator(num_epochs=1): res.append(list(d["text"])) @@ -60,7 +60,7 @@ def test_from_dataset(): corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"]) vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["", ""], special_first=True) - corpus_dataset = corpus_dataset.map(input_columns="text", operations=text.Lookup(vocab, "")) + corpus_dataset = corpus_dataset.map(operations=text.Lookup(vocab, ""), input_columns="text") res = [] for d in corpus_dataset.create_dict_iterator(num_epochs=1): res.append(list(d["text"])) @@ -108,7 +108,7 @@ def test_from_dataset_special_token(): corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"]) vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first) data = ds.GeneratorDataset(gen_input(texts), column_names=["text"]) - data = data.map(input_columns="text", operations=text.Lookup(vocab, "")) + data = data.map(operations=text.Lookup(vocab, ""), input_columns="text") res = [] for d in data.create_dict_iterator(num_epochs=1): res.append(d["text"].item()) diff --git a/tests/ut/python/dataset/test_get_col_names.py b/tests/ut/python/dataset/test_get_col_names.py index 9ef20b50f5..5f665b983f 100644 --- a/tests/ut/python/dataset/test_get_col_names.py +++ b/tests/ut/python/dataset/test_get_col_names.py @@ -95,16 +95,16 @@ def test_get_column_name_manifest(): def test_get_column_name_map(): data = ds.Cifar10Dataset(CIFAR10_DIR) center_crop_op = vision.CenterCrop(10) - data = data.map(input_columns=["image"], operations=center_crop_op) + data = data.map(operations=center_crop_op, input_columns=["image"]) assert data.get_col_names() == ["image", "label"] data = ds.Cifar10Dataset(CIFAR10_DIR) - data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["image"]) + data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["image"]) assert data.get_col_names() == ["image", "label"] data = ds.Cifar10Dataset(CIFAR10_DIR) - data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1"]) + data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1"]) assert data.get_col_names() == ["col1", "label"] data = ds.Cifar10Dataset(CIFAR10_DIR) - data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1", "col2"], + data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1", "col2"], column_order=["col2", "col1"]) assert data.get_col_names() == ["col2", "col1"] diff --git a/tests/ut/python/dataset/test_invert.py b/tests/ut/python/dataset/test_invert.py index 3ffefc5d9d..7ec5c79e49 100644 --- a/tests/ut/python/dataset/test_invert.py +++ b/tests/ut/python/dataset/test_invert.py @@ -42,8 +42,7 @@ def test_invert_py(plot=False): F.Resize((224, 224)), F.ToTensor()]) - ds_original = ds.map(input_columns="image", - operations=transforms_original) + ds_original = ds.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) @@ -63,8 +62,7 @@ def test_invert_py(plot=False): F.Invert(), F.ToTensor()]) - ds_invert = ds.map(input_columns="image", - operations=transforms_invert) + ds_invert = ds.map(operations=transforms_invert, input_columns="image") ds_invert = ds_invert.batch(512) @@ -97,8 +95,7 @@ def test_invert_c(plot=False): transforms_original = [C.Decode(), C.Resize(size=[224, 224])] - ds_original = ds.map(input_columns="image", - operations=transforms_original) + ds_original = ds.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) @@ -116,8 +113,7 @@ def test_invert_c(plot=False): transform_invert = [C.Decode(), C.Resize(size=[224, 224]), C.Invert()] - ds_invert = ds.map(input_columns="image", - operations=transform_invert) + ds_invert = ds.map(operations=transform_invert, input_columns="image") ds_invert = ds_invert.batch(512) @@ -146,11 +142,9 @@ def test_invert_py_c(plot=False): # Invert Images in cpp ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), C.Resize((224, 224))]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) - ds_c_invert = ds.map(input_columns="image", - operations=C.Invert()) + ds_c_invert = ds.map(operations=C.Invert(), input_columns="image") ds_c_invert = ds_c_invert.batch(512) @@ -164,16 +158,14 @@ def test_invert_py_c(plot=False): # invert images in python ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), C.Resize((224, 224))]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) transforms_p_invert = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8), F.ToPIL(), F.Invert(), np.array]) - ds_p_invert = ds.map(input_columns="image", - operations=transforms_p_invert) + ds_p_invert = ds.map(operations=transforms_p_invert, input_columns="image") ds_p_invert = ds_p_invert.batch(512) @@ -205,13 +197,10 @@ def test_invert_one_channel(): try: ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - ds = ds.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((224, 224)), - lambda img: np.array(img[:, :, 0])]) + ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), + lambda img: np.array(img[:, :, 0])], input_columns=["image"]) - ds.map(input_columns="image", - operations=c_op) + ds.map(operations=c_op, input_columns="image") except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) @@ -231,7 +220,7 @@ def test_invert_md5_py(): F.Invert(), F.ToTensor()]) - data = ds.map(input_columns="image", operations=transforms_invert) + data = ds.map(operations=transforms_invert, input_columns="image") # Compare with expected md5 from images filename = "invert_01_result_py.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -251,7 +240,7 @@ def test_invert_md5_c(): C.Invert(), F.ToTensor()] - data = ds.map(input_columns="image", operations=transforms_invert) + data = ds.map(operations=transforms_invert, input_columns="image") # Compare with expected md5 from images filename = "invert_01_result_c.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) diff --git a/tests/ut/python/dataset/test_linear_transformation.py b/tests/ut/python/dataset/test_linear_transformation.py index 7a82d5df8a..488cafa686 100644 --- a/tests/ut/python/dataset/test_linear_transformation.py +++ b/tests/ut/python/dataset/test_linear_transformation.py @@ -51,15 +51,15 @@ def test_linear_transformation_op(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) # Note: if transformation matrix is diagonal matrix with all 1 in diagonal, # the output matrix in expected to be the same as the input matrix. - data1 = data1.map(input_columns=["image"], - operations=py_vision.LinearTransformation(transformation_matrix, mean_vector)) + data1 = data1.map(operations=py_vision.LinearTransformation(transformation_matrix, mean_vector), + input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) image_transformed = [] image = [] @@ -98,7 +98,7 @@ def test_linear_transformation_md5(): py_vision.LinearTransformation(transformation_matrix, mean_vector) ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "linear_transformation_01_result.npz" @@ -128,7 +128,7 @@ def test_linear_transformation_exception_01(): py_vision.LinearTransformation(None, mean_vector) ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) except TypeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Argument transformation_matrix with value None is not of type (,)" in str(e) @@ -157,7 +157,7 @@ def test_linear_transformation_exception_02(): py_vision.LinearTransformation(transformation_matrix, None) ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) except TypeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Argument mean_vector with value None is not of type (,)" in str(e) @@ -187,7 +187,7 @@ def test_linear_transformation_exception_03(): py_vision.LinearTransformation(transformation_matrix, mean_vector) ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "square matrix" in str(e) @@ -217,7 +217,7 @@ def test_linear_transformation_exception_04(): py_vision.LinearTransformation(transformation_matrix, mean_vector) ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "should match" in str(e) diff --git a/tests/ut/python/dataset/test_minddataset.py b/tests/ut/python/dataset/test_minddataset.py index db1052adab..1a36bdd766 100644 --- a/tests/ut/python/dataset/test_minddataset.py +++ b/tests/ut/python/dataset/test_minddataset.py @@ -73,6 +73,7 @@ def add_and_remove_cv_file(): os.remove("{}".format(x)) os.remove("{}.db".format(x)) + @pytest.fixture def add_and_remove_nlp_file(): """add/remove nlp file""" @@ -265,6 +266,7 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file): assert partitions(5) == 2 assert partitions(9) == 2 + def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] @@ -287,6 +289,7 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): assert partitions(5) == 1 assert partitions(9) == 1 + def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] @@ -309,6 +312,7 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): assert partitions(5) == 2 assert partitions(9) == 2 + def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] @@ -354,11 +358,11 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) num_iter += 1 if num_iter <= 4: - epoch1.append(item["file_name"]) # save epoch 1 list + epoch1.append(item["file_name"]) # save epoch 1 list elif num_iter <= 8: - epoch2.append(item["file_name"]) # save epoch 2 list + epoch2.append(item["file_name"]) # save epoch 2 list else: - epoch3.append(item["file_name"]) # save epoch 3 list + epoch3.append(item["file_name"]) # save epoch 3 list assert num_iter == 12 assert len(epoch1) == 4 assert len(epoch2) == 4 @@ -376,9 +380,9 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc columns_list = ["data", "file_name", "label"] num_readers = 4 num_shards = 3 - epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result - [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result - [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result + epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result + [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result + [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result for partition_id in range(num_shards): data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, @@ -392,7 +396,7 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) # total 3 partition, 4 result per epoch, total 12 result - epoch_result[partition_id][int(num_iter / 4)][num_iter % 4] = item["file_name"] # save epoch result + epoch_result[partition_id][int(num_iter / 4)][num_iter % 4] = item["file_name"] # save epoch result num_iter += 1 assert num_iter == 12 assert epoch_result[partition_id][0] not in (epoch_result[partition_id][1], epoch_result[partition_id][2]) @@ -425,11 +429,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) num_iter += 1 if num_iter <= 10: - epoch1.append(item["file_name"]) # save epoch 1 list + epoch1.append(item["file_name"]) # save epoch 1 list elif num_iter <= 20: - epoch2.append(item["file_name"]) # save epoch 2 list + epoch2.append(item["file_name"]) # save epoch 2 list else: - epoch3.append(item["file_name"]) # save epoch 3 list + epoch3.append(item["file_name"]) # save epoch 3 list assert num_iter == 30 assert len(epoch1) == 10 assert len(epoch2) == 10 @@ -451,11 +455,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) num_iter += 1 if num_iter <= 10: - epoch1_new_dataset.append(item["file_name"]) # save epoch 1 list + epoch1_new_dataset.append(item["file_name"]) # save epoch 1 list elif num_iter <= 20: - epoch2_new_dataset.append(item["file_name"]) # save epoch 2 list + epoch2_new_dataset.append(item["file_name"]) # save epoch 2 list else: - epoch3_new_dataset.append(item["file_name"]) # save epoch 3 list + epoch3_new_dataset.append(item["file_name"]) # save epoch 3 list assert num_iter == 30 assert len(epoch1_new_dataset) == 10 assert len(epoch2_new_dataset) == 10 @@ -482,11 +486,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) num_iter += 1 if num_iter <= 10: - epoch1_new_dataset2.append(item["file_name"]) # save epoch 1 list + epoch1_new_dataset2.append(item["file_name"]) # save epoch 1 list elif num_iter <= 20: - epoch2_new_dataset2.append(item["file_name"]) # save epoch 2 list + epoch2_new_dataset2.append(item["file_name"]) # save epoch 2 list else: - epoch3_new_dataset2.append(item["file_name"]) # save epoch 3 list + epoch3_new_dataset2.append(item["file_name"]) # save epoch 3 list assert num_iter == 30 assert len(epoch1_new_dataset2) == 10 assert len(epoch2_new_dataset2) == 10 @@ -532,8 +536,8 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): data_set = data_set.map( input_columns=["data"], operations=decode_op, num_parallel_workers=2) resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR) - data_set = data_set.map(input_columns="data", - operations=resize_op, num_parallel_workers=2) + data_set = data_set.map(operations=resize_op, input_columns="data", + num_parallel_workers=2) data_set = data_set.batch(2) data_set = data_set.repeat(2) num_iter = 0 @@ -563,8 +567,8 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): data_set = data_set.map( input_columns=["data"], operations=decode_op, num_parallel_workers=2) resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR) - data_set = data_set.map(input_columns="data", - operations=resize_op, num_parallel_workers=2) + data_set = data_set.map(operations=resize_op, input_columns="data", + num_parallel_workers=2) data_set = data_set.batch(32, drop_remainder=True) num_iter = 0 for item in data_set.create_dict_iterator(num_epochs=1): @@ -707,6 +711,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): if os.path.exists("{}.db".format(CV2_FILE_NAME)): os.remove("{}.db".format(CV2_FILE_NAME)) + def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0')) for x in range(FILES_NUM)] @@ -757,6 +762,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): os.remove("{}".format(x)) os.remove("{}.db".format(x)) + def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] @@ -778,6 +784,7 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): num_iter += 1 assert num_iter == 10 + def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): """tutorial for nlp minderdataset.""" num_readers = 4 @@ -1522,6 +1529,7 @@ def test_write_with_multi_bytes_and_MindDataset(): os.remove("{}".format(mindrecord_file_name)) os.remove("{}.db".format(mindrecord_file_name)) + def test_write_with_multi_array_and_MindDataset(): mindrecord_file_name = "test.mindrecord" try: @@ -1741,9 +1749,9 @@ def test_numpy_generic(): for idx in range(10): row = {} row['label1'] = np.int32(idx) - row['label2'] = np.int64(idx*10) - row['label3'] = np.float32(idx+0.12345) - row['label4'] = np.float64(idx+0.12345789) + row['label2'] = np.int64(idx * 10) + row['label3'] = np.float32(idx + 0.12345) + row['label4'] = np.float64(idx + 0.12345789) data.append(row) writer.add_schema(cv_schema_json, "img_schema") writer.write_raw_data(data) @@ -1923,6 +1931,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( os.remove("{}".format(mindrecord_file_name)) os.remove("{}.db".format(mindrecord_file_name)) + if __name__ == '__main__': test_nlp_compress_data(add_and_remove_nlp_compress_file) test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file) diff --git a/tests/ut/python/dataset/test_mixup_label_smoothing.py b/tests/ut/python/dataset/test_mixup_label_smoothing.py index 33b076b86a..9786707e6e 100644 --- a/tests/ut/python/dataset/test_mixup_label_smoothing.py +++ b/tests/ut/python/dataset/test_mixup_label_smoothing.py @@ -37,9 +37,9 @@ def test_one_hot_op(): num_classes = 2 epsilon_para = 0.1 - transforms = [f.OneHotOp(num_classes=num_classes, smoothing_rate=epsilon_para),] + transforms = [f.OneHotOp(num_classes=num_classes, smoothing_rate=epsilon_para)] transform_label = f.Compose(transforms) - dataset = dataset.map(input_columns=["label"], operations=transform_label) + dataset = dataset.map(operations=transform_label, input_columns=["label"]) golden_label = np.ones(num_classes) * epsilon_para / num_classes golden_label[1] = 1 - epsilon_para / num_classes @@ -69,9 +69,9 @@ def test_mix_up_single(): resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR) one_hot_encode = c.OneHot(num_classes) # num_classes is input argument - ds1 = ds1.map(input_columns=["image"], operations=decode_op) - ds1 = ds1.map(input_columns=["image"], operations=resize_op) - ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode) + ds1 = ds1.map(operations=decode_op, input_columns=["image"]) + ds1 = ds1.map(operations=resize_op, input_columns=["image"]) + ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"]) # apply batch operations batch_size = 3 @@ -81,7 +81,7 @@ def test_mix_up_single(): alpha = 0.2 transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=True) ] - ds1 = ds1.map(input_columns=["image", "label"], operations=transforms) + ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): image1 = data1["image"] @@ -118,9 +118,9 @@ def test_mix_up_multi(): resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR) one_hot_encode = c.OneHot(num_classes) # num_classes is input argument - ds1 = ds1.map(input_columns=["image"], operations=decode_op) - ds1 = ds1.map(input_columns=["image"], operations=resize_op) - ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode) + ds1 = ds1.map(operations=decode_op, input_columns=["image"]) + ds1 = ds1.map(operations=resize_op, input_columns=["image"]) + ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"]) # apply batch operations batch_size = 3 @@ -130,7 +130,7 @@ def test_mix_up_multi(): alpha = 0.2 transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False) ] - ds1 = ds1.map(input_columns=["image", "label"], operations=transforms) + ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) num_iter = 0 batch1_image1 = 0 for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): diff --git a/tests/ut/python/dataset/test_mixup_op.py b/tests/ut/python/dataset/test_mixup_op.py index ab7a2f8355..6357dc8f2a 100644 --- a/tests/ut/python/dataset/test_mixup_op.py +++ b/tests/ut/python/dataset/test_mixup_op.py @@ -30,6 +30,7 @@ DATA_DIR3 = "../data/dataset/testCelebAData/" GENERATE_GOLDEN = False + def test_mixup_batch_success1(plot=False): """ Test MixUpBatch op with specified alpha parameter @@ -51,10 +52,10 @@ def test_mixup_batch_success1(plot=False): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) mixup_batch_op = vision.MixUpBatch(2) data1 = data1.batch(5, drop_remainder=True) - data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) + data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) images_mixup = None for idx, (image, _) in enumerate(data1): @@ -81,7 +82,7 @@ def test_mixup_batch_success2(plot=False): # Original Images ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() - ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) + ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) images_original = None @@ -95,14 +96,14 @@ def test_mixup_batch_success2(plot=False): data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() - data1 = data1.map(input_columns=["image"], operations=[decode_op]) + data1 = data1.map(operations=[decode_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) mixup_batch_op = vision.MixUpBatch(2.0) data1 = data1.batch(4, pad_info={}, drop_remainder=True) - data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) + data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) images_mixup = None for idx, (image, _) in enumerate(data1): @@ -142,10 +143,10 @@ def test_mixup_batch_success3(plot=False): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) mixup_batch_op = vision.MixUpBatch() data1 = data1.batch(5, drop_remainder=True) - data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) + data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) images_mixup = np.array([]) for idx, (image, _) in enumerate(data1): @@ -173,7 +174,7 @@ def test_mixup_batch_success4(plot=False): # Original Images ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) decode_op = vision.Decode() - ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) + ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) ds_original = ds_original.batch(2, drop_remainder=True) images_original = None @@ -187,14 +188,14 @@ def test_mixup_batch_success4(plot=False): data1 = ds.CelebADataset(DATA_DIR3, shuffle=False) decode_op = vision.Decode() - data1 = data1.map(input_columns=["image"], operations=[decode_op]) + data1 = data1.map(operations=[decode_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=100) - data1 = data1.map(input_columns=["attr"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["attr"]) mixup_batch_op = vision.MixUpBatch() data1 = data1.batch(2, drop_remainder=True) - data1 = data1.map(input_columns=["image", "attr"], operations=mixup_batch_op) + data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "attr"]) images_mixup = np.array([]) for idx, (image, _) in enumerate(data1): @@ -224,10 +225,10 @@ def test_mixup_batch_md5(): data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data = data.map(input_columns=["label"], operations=one_hot_op) + data = data.map(operations=one_hot_op, input_columns=["label"]) mixup_batch_op = vision.MixUpBatch() data = data.batch(5, drop_remainder=True) - data = data.map(input_columns=["image", "label"], operations=mixup_batch_op) + data = data.map(operations=mixup_batch_op, input_columns=["image", "label"]) filename = "mixup_batch_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -259,10 +260,10 @@ def test_mixup_batch_fail1(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) mixup_batch_op = vision.MixUpBatch(0.1) with pytest.raises(RuntimeError) as error: - data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) + data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) for idx, (image, _) in enumerate(data1): if idx == 0: images_mixup = image @@ -294,7 +295,7 @@ def test_mixup_batch_fail2(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) with pytest.raises(ValueError) as error: vision.MixUpBatch(-1) error_message = "Input is not within the required interval" @@ -322,10 +323,10 @@ def test_mixup_batch_fail3(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) mixup_batch_op = vision.MixUpBatch() data1 = data1.batch(5, drop_remainder=True) - data1 = data1.map(input_columns=["image"], operations=mixup_batch_op) + data1 = data1.map(operations=mixup_batch_op, input_columns=["image"]) with pytest.raises(RuntimeError) as error: images_mixup = np.array([]) @@ -337,6 +338,7 @@ def test_mixup_batch_fail3(): error_message = "Both images and labels columns are required" assert error_message in str(error.value) + def test_mixup_batch_fail4(): """ Test MixUpBatch Fail 2 @@ -359,7 +361,7 @@ def test_mixup_batch_fail4(): data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) - data1 = data1.map(input_columns=["label"], operations=one_hot_op) + data1 = data1.map(operations=one_hot_op, input_columns=["label"]) with pytest.raises(ValueError) as error: vision.MixUpBatch(0.0) error_message = "Input is not within the required interval" @@ -389,7 +391,7 @@ def test_mixup_batch_fail5(): mixup_batch_op = vision.MixUpBatch() data1 = data1.batch(5, drop_remainder=True) - data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) + data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) with pytest.raises(RuntimeError) as error: images_mixup = np.array([]) diff --git a/tests/ut/python/dataset/test_ngram_op.py b/tests/ut/python/dataset/test_ngram_op.py index 367bcea9bd..ef65a8e561 100644 --- a/tests/ut/python/dataset/test_ngram_op.py +++ b/tests/ut/python/dataset/test_ngram_op.py @@ -39,7 +39,7 @@ def test_multiple_ngrams(): yield (np.array(line.split(" "), dtype='S'),) dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"]) - dataset = dataset.map(input_columns=["text"], operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " ")) + dataset = dataset.map(operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "), input_columns="text") i = 0 for data in dataset.create_dict_iterator(num_epochs=1): @@ -61,7 +61,7 @@ def test_simple_ngram(): yield (np.array(line.split(" "), dtype='S'),) dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"]) - dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=" ")) + dataset = dataset.map(operations=text.Ngram(3, separator=" "), input_columns="text") i = 0 for data in dataset.create_dict_iterator(num_epochs=1): @@ -78,7 +78,7 @@ def test_corner_cases(): try: dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"]) - dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep)) + dataset = dataset.map(operations=text.Ngram(n, l_pad, r_pad, separator=sep), input_columns=["text"]) for data in dataset.create_dict_iterator(num_epochs=1): return [d.decode("utf8") for d in data["text"]] except (ValueError, TypeError) as e: diff --git a/tests/ut/python/dataset/test_nlp.py b/tests/ut/python/dataset/test_nlp.py index cab9c03a4f..6fe2b029c5 100644 --- a/tests/ut/python/dataset/test_nlp.py +++ b/tests/ut/python/dataset/test_nlp.py @@ -32,10 +32,10 @@ def test_on_tokenized_line(): for line in f: word = line.split(',')[0] jieba_op.add_word(word) - data = data.map(input_columns=["text"], operations=jieba_op) + data = data.map(operations=jieba_op, input_columns=["text"]) vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["", ""]) lookup = text.Lookup(vocab, "") - data = data.map(input_columns=["text"], operations=lookup) + data = data.map(operations=lookup, input_columns=["text"]) res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14], [11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32) for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): @@ -50,10 +50,10 @@ def test_on_tokenized_line_with_no_special_tokens(): word = line.split(',')[0] jieba_op.add_word(word) - data = data.map(input_columns=["text"], operations=jieba_op) + data = data.map(operations=jieba_op, input_columns=["text"]) vocab = text.Vocab.from_file(VOCAB_FILE, ",") lookup = text.Lookup(vocab, "not") - data = data.map(input_columns=["text"], operations=lookup) + data = data.map(operations=lookup, input_columns=["text"]) res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12], [9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32) for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): diff --git a/tests/ut/python/dataset/test_normalizeOp.py b/tests/ut/python/dataset/test_normalizeOp.py index 9303a86c80..bfd585969d 100644 --- a/tests/ut/python/dataset/test_normalizeOp.py +++ b/tests/ut/python/dataset/test_normalizeOp.py @@ -51,8 +51,8 @@ def util_test_normalize(mean, std, op_type): normalize_op = c_vision.Normalize(mean, std) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=normalize_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=normalize_op, input_columns=["image"]) elif op_type == "python": # define map operations transforms = [ @@ -63,7 +63,7 @@ def util_test_normalize(mean, std, op_type): transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) else: raise ValueError("Wrong parameter value") return data @@ -82,7 +82,7 @@ def util_test_normalize_grayscale(num_output_channels, mean, std): transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) return data @@ -99,12 +99,12 @@ def test_normalize_op_c(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=normalize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=normalize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -136,12 +136,12 @@ def test_normalize_op_py(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform) - data1 = data1.map(input_columns=["image"], operations=normalize_op) + data1 = data1.map(operations=transform, input_columns=["image"]) + data1 = data1.map(operations=normalize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -169,7 +169,7 @@ def test_decode_op(): decode_op = c_vision.Decode() # apply map operations on images - data1 = data1.map(input_columns=["image"], operations=decode_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) num_iter = 0 for item in data1.create_dict_iterator(num_epochs=1): @@ -192,7 +192,7 @@ def test_decode_normalize_op(): normalize_op = c_vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0]) # apply map operations on images - data1 = data1.map(input_columns=["image"], operations=[decode_op, normalize_op]) + data1 = data1.map(operations=[decode_op, normalize_op], input_columns=["image"]) num_iter = 0 for item in data1.create_dict_iterator(num_epochs=1): diff --git a/tests/ut/python/dataset/test_onehot_op.py b/tests/ut/python/dataset/test_onehot_op.py index 062256f505..29618d9168 100644 --- a/tests/ut/python/dataset/test_onehot_op.py +++ b/tests/ut/python/dataset/test_onehot_op.py @@ -47,13 +47,14 @@ def test_one_hot(): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=depth) - data1 = data1.map(input_columns=["label"], operations=one_hot_op, column_order=["label"]) + data1 = data1.map(operations=one_hot_op, input_columns=["label"], column_order=["label"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False) assert dataset_equal_with_function(data1, data2, 0, one_hot, depth) + def test_one_hot_post_aug(): """ Test One Hot Encoding after Multiple Data Augmentation Operators @@ -72,14 +73,14 @@ def test_one_hot_post_aug(): resize_op = c_vision.Resize((resize_height, resize_width)) # Apply map operations on images - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=rescale_op) - data1 = data1.map(input_columns=["image"], operations=resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=rescale_op, input_columns=["image"]) + data1 = data1.map(operations=resize_op, input_columns=["image"]) # Apply one-hot encoding on labels depth = 4 one_hot_encode = data_trans.OneHot(depth) - data1 = data1.map(input_columns=["label"], operations=one_hot_encode) + data1 = data1.map(operations=one_hot_encode, input_columns=["label"]) # Apply datasets ops buffer_size = 100 diff --git a/tests/ut/python/dataset/test_opt_pass.py b/tests/ut/python/dataset/test_opt_pass.py index 91e06ad3a2..e1a519f680 100644 --- a/tests/ut/python/dataset/test_opt_pass.py +++ b/tests/ut/python/dataset/test_opt_pass.py @@ -16,6 +16,7 @@ import numpy as np import mindspore.dataset as ds + # tests the construction of multiple ops from a single dataset. # map dataset with columns order arguments should produce a ProjectOp over MapOp # This test does not utilize the compiling passes at this time. @@ -27,12 +28,13 @@ def test_map_reorder0(): # Generator -> Map data0 = ds.GeneratorDataset(generator_mc, ["col0", "col1"]) - data0 = data0.map(input_columns="col0", output_columns="out", column_order=["col1", "out"], - operations=(lambda x: x)) + data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out", + column_order=["col1", "out"]) for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary assert item == [np.array(1), np.array(0)] + # tests the construction of multiple ops from a single dataset. # map dataset with columns order arguments should produce a ProjectOp over MapOp # This test does not utilize the compiling passes at this time. @@ -43,20 +45,20 @@ def test_map_reorder1(): # Three map and zip data0 = ds.GeneratorDataset(generator_mc, ["a0", "a1", "a2"]) - data0 = data0.map(input_columns="a0", column_order=["a2", "a1", "a0"], operations=(lambda x: x)) + data0 = data0.map(operations=(lambda x: x), input_columns="a0", column_order=["a2", "a1", "a0"]) data1 = ds.GeneratorDataset(generator_mc, ["b0", "b1", "b2"]) - data1 = data1.map(input_columns="b0", column_order=["b1", "b2", "b0"], operations=(lambda x: x)) + data1 = data1.map(operations=(lambda x: x), input_columns="b0", column_order=["b1", "b2", "b0"]) data2 = ds.zip((data0, data1)) - data2 = data2.map(input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"], operations=(lambda x: x)) + data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"]) for item in data2.create_tuple_iterator(num_epochs=1): assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)] + # tests the construction of multiple ops from a single dataset. # TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp. # This test does not utilize the compiling passes at this time. def test_shuffle(): - FILES = ["../data/dataset/testTFTestAllTypes/test.data"] SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json" diff --git a/tests/ut/python/dataset/test_pad.py b/tests/ut/python/dataset/test_pad.py index 3814b7e979..803ea1e8d0 100644 --- a/tests/ut/python/dataset/test_pad.py +++ b/tests/ut/python/dataset/test_pad.py @@ -44,7 +44,7 @@ def test_pad_op(): pad_op, ] - data1 = data1.map(input_columns=["image"], operations=ctrans) + data1 = data1.map(operations=ctrans, input_columns=["image"]) # Second dataset transforms = [ @@ -54,7 +54,7 @@ def test_pad_op(): ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): c_image = item1["image"] @@ -88,11 +88,11 @@ def test_pad_grayscale(): transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) # if input is grayscale, the output dimensions should be single channel pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20)) - data1 = data1.map(input_columns=["image"], operations=pad_gray) + data1 = data1.map(operations=pad_gray, input_columns=["image"]) dataset_shape_1 = [] for item1 in data1.create_dict_iterator(num_epochs=1): c_image = item1["image"] @@ -106,7 +106,7 @@ def test_pad_grayscale(): ctrans = [decode_op, pad_gray] dataset_shape_2 = [] - data2 = data2.map(input_columns=["image"], operations=ctrans) + data2 = data2.map(operations=ctrans, input_columns=["image"]) for item2 in data2.create_dict_iterator(num_epochs=1): c_image = item2["image"] @@ -132,7 +132,7 @@ def test_pad_md5(): pad_op, ] - data1 = data1.map(input_columns=["image"], operations=ctrans) + data1 = data1.map(operations=ctrans, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -142,7 +142,7 @@ def test_pad_md5(): py_vision.ToTensor(), ] transform = mindspore.dataset.transforms.py_transforms.Compose(pytrans) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename1 = "pad_01_c_result.npz" save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) diff --git a/tests/ut/python/dataset/test_pad_batch.py b/tests/ut/python/dataset/test_pad_batch.py index aae340e8c4..e63c4a9ab7 100644 --- a/tests/ut/python/dataset/test_pad_batch.py +++ b/tests/ut/python/dataset/test_pad_batch.py @@ -127,7 +127,7 @@ def batch_padding_performance_1d(): cifar10_dir = "../data/dataset/testCifar10Data" data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] data1 = data1.repeat(24) - data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) + data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image") pad_info = {"image": ([3888], 0)} # 3888 =36*36*3 # pad_info = None data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info) @@ -144,7 +144,7 @@ def batch_pyfunc_padding_3d(): data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] data1 = data1.repeat(24) # pad_info = {"image": ([36, 36, 3], 0)} - data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))), + data1 = data1.map(operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))), input_columns="image", python_multiprocessing=False) data1 = data1.batch(batch_size=24, drop_remainder=True) start_time = time.time() @@ -159,8 +159,8 @@ def batch_pyfunc_padding_1d(): cifar10_dir = "../data/dataset/testCifar10Data" data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] data1 = data1.repeat(24) - data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) - data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816))), python_multiprocessing=False) + data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image") + data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image", python_multiprocessing=False) data1 = data1.batch(batch_size=24, drop_remainder=True) start_time = time.time() num_batches = 0 @@ -176,8 +176,8 @@ def test_pad_via_map(): def pad_map_config(): data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3] - data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d - data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816)))) + data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d + data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image") data1 = data1.batch(batch_size=25, drop_remainder=True) res = [] for data in data1.create_dict_iterator(num_epochs=1): @@ -186,7 +186,7 @@ def test_pad_via_map(): def pad_batch_config(): data2 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3] - data2 = data2.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d + data2 = data2.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)}) res = [] for data in data2.create_dict_iterator(num_epochs=1): diff --git a/tests/ut/python/dataset/test_paddeddataset.py b/tests/ut/python/dataset/test_paddeddataset.py index 370690543e..4f3e7da187 100644 --- a/tests/ut/python/dataset/test_paddeddataset.py +++ b/tests/ut/python/dataset/test_paddeddataset.py @@ -12,22 +12,27 @@ FILES_NUM = 4 CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord" CV_DIR_NAME = "../data/mindrecord/testImageNetData" + def generator_5(): for i in range(0, 5): yield (np.array([i]),) + def generator_8(): for i in range(5, 8): yield (np.array([i]),) + def generator_10(): for i in range(0, 10): yield (np.array([i]),) + def generator_20(): for i in range(10, 20): yield (np.array([i]),) + def generator_30(): for i in range(20, 30): yield (np.array([i]),) @@ -57,12 +62,13 @@ def test_TFRecord_Padded(): verify_list.append(shard_list) assert verify_list == result_list + def test_GeneratorDataSet_Padded(): result_list = [] for i in range(10): tem_list = [] tem_list.append(i) - tem_list.append(10+i) + tem_list.append(10 + i) result_list.append(tem_list) verify_list = [] @@ -80,6 +86,7 @@ def test_GeneratorDataSet_Padded(): assert verify_list == result_list + def test_Reapeat_afterPadded(): result_list = [1, 3, 5, 7] verify_list = [] @@ -103,6 +110,7 @@ def test_Reapeat_afterPadded(): assert verify_list == result_list * repeat_num + def test_bath_afterPadded(): data1 = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)}, @@ -120,6 +128,7 @@ def test_bath_afterPadded(): ds4 = ds3.batch(2) assert sum([1 for _ in ds4]) == 2 + def test_Unevenly_distributed(): result_list = [[1, 4, 7], [2, 5, 8], [3, 6]] verify_list = [] @@ -145,6 +154,7 @@ def test_Unevenly_distributed(): verify_list.append(tem_list) assert verify_list == result_list + def test_three_datasets_connected(): result_list = [] for i in range(10): @@ -170,6 +180,7 @@ def test_three_datasets_connected(): assert verify_list == result_list + def test_raise_error(): data1 = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(2, np.uint8)}, {'image': np.zeros(3, np.uint8)}, {'image': np.zeros(4, np.uint8)}, @@ -202,6 +213,7 @@ def test_raise_error(): ds3.use_sampler(testsampler) assert excinfo.type == 'ValueError' + def test_imagefolder_padded(): DATA_DIR = "../data/dataset/testPK/data" data = ds.ImageFolderDataset(DATA_DIR) @@ -225,6 +237,7 @@ def test_imagefolder_padded(): assert verify_list[8] == 1 assert verify_list[9] == 6 + def test_imagefolder_padded_with_decode(): num_shards = 5 count = 0 @@ -244,7 +257,7 @@ def test_imagefolder_padded_with_decode(): testsampler = ds.DistributedSampler(num_shards=num_shards, shard_id=shard_id, shuffle=False, num_samples=None) data3.use_sampler(testsampler) - data3 = data3.map(input_columns="image", operations=V_C.Decode()) + data3 = data3.map(operations=V_C.Decode(), input_columns="image") shard_sample_count = 0 for ele in data3.create_dict_iterator(num_epochs=1): print("label: {}".format(ele['label'])) @@ -253,6 +266,7 @@ def test_imagefolder_padded_with_decode(): assert shard_sample_count in (9, 10) assert count == 48 + def test_imagefolder_padded_with_decode_and_get_dataset_size(): num_shards = 5 count = 0 @@ -273,7 +287,7 @@ def test_imagefolder_padded_with_decode_and_get_dataset_size(): testsampler = ds.DistributedSampler(num_shards=num_shards, shard_id=shard_id, shuffle=False, num_samples=None) data3.use_sampler(testsampler) shard_dataset_size = data3.get_dataset_size() - data3 = data3.map(input_columns="image", operations=V_C.Decode()) + data3 = data3.map(operations=V_C.Decode(), input_columns="image") shard_sample_count = 0 for ele in data3.create_dict_iterator(num_epochs=1): print("label: {}".format(ele['label'])) @@ -283,6 +297,7 @@ def test_imagefolder_padded_with_decode_and_get_dataset_size(): assert shard_dataset_size == shard_sample_count assert count == 48 + def test_more_shard_padded(): result_list = [] for i in range(8): @@ -307,7 +322,7 @@ def test_more_shard_padded(): vertifyList1 = [] result_list1 = [] for i in range(8): - result_list1.append([i+1]) + result_list1.append([i + 1]) result_list1.append([]) data1 = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(2, np.uint8)}, @@ -330,6 +345,7 @@ def test_more_shard_padded(): assert vertifyList1 == result_list1 + def get_data(dir_name): """ usage: get data from imagenet dataset @@ -360,6 +376,7 @@ def get_data(dir_name): continue return data_list + @pytest.fixture(name="remove_mindrecord_file") def add_and_remove_cv_file(): """add/remove cv file""" @@ -392,6 +409,7 @@ def add_and_remove_cv_file(): os.remove("{}".format(x)) os.remove("{}.db".format(x)) + def test_Mindrecord_Padded(remove_mindrecord_file): result_list = [] verify_list = [[1, 2], [3, 4], [5, 11], [6, 12], [7, 13], [8, 14], [9], [10]] @@ -413,6 +431,7 @@ def test_Mindrecord_Padded(remove_mindrecord_file): result_list.append(tem_list) assert result_list == verify_list + def test_clue_padded_and_skip_with_0_samples(): """ Test num_samples param of CLUE dataset @@ -441,7 +460,7 @@ def test_clue_padded_and_skip_with_0_samples(): count += 1 assert count == 2 - dataset = dataset.skip(count=2) # dataset2 has none samples + dataset = dataset.skip(count=2) # dataset2 has none samples count = 0 for data in dataset.create_dict_iterator(num_epochs=1): count += 1 @@ -454,6 +473,7 @@ def test_clue_padded_and_skip_with_0_samples(): count += 1 assert count == 2 + def test_celeba_padded(): data = ds.CelebADataset("../data/dataset/testCelebAData/") @@ -469,6 +489,7 @@ def test_celeba_padded(): count = count + 1 assert count == 2 + if __name__ == '__main__': test_TFRecord_Padded() test_GeneratorDataSet_Padded() diff --git a/tests/ut/python/dataset/test_pair_truncate.py b/tests/ut/python/dataset/test_pair_truncate.py index 784879f4ae..c0733b997b 100644 --- a/tests/ut/python/dataset/test_pair_truncate.py +++ b/tests/ut/python/dataset/test_pair_truncate.py @@ -23,7 +23,7 @@ import mindspore.dataset.text as text def compare(in1, in2, length, out1, out2): data = ds.NumpySlicesDataset({"s1": [in1], "s2": [in2]}) - data = data.map(input_columns=["s1", "s2"], operations=text.TruncateSequencePair(length)) + data = data.map(operations=text.TruncateSequencePair(length), input_columns=["s1", "s2"]) for d in data.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(out1, d["s1"]) np.testing.assert_array_equal(out2, d["s2"]) diff --git a/tests/ut/python/dataset/test_project.py b/tests/ut/python/dataset/test_project.py index 1821165798..599dd27775 100644 --- a/tests/ut/python/dataset/test_project.py +++ b/tests/ut/python/dataset/test_project.py @@ -64,7 +64,7 @@ def test_case_project_map(): data1 = data1.project(columns=columns) type_cast_op = C.TypeCast(mstype.int64) - data1 = data1.map(input_columns=["col_3d"], operations=type_cast_op) + data1 = data1.map(operations=type_cast_op, input_columns=["col_3d"]) filename = "project_map_after_result.npz" save_and_check_tuple(data1, parameters, filename, generate_golden=GENERATE_GOLDEN) @@ -77,7 +77,7 @@ def test_case_map_project(): data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, shuffle=False) type_cast_op = C.TypeCast(mstype.int64) - data1 = data1.map(input_columns=["col_sint64"], operations=type_cast_op) + data1 = data1.map(operations=type_cast_op, input_columns=["col_sint64"]) data1 = data1.project(columns=columns) @@ -92,18 +92,18 @@ def test_case_project_between_maps(): data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, shuffle=False) type_cast_op = C.TypeCast(mstype.int64) - data1 = data1.map(input_columns=["col_3d"], operations=type_cast_op) - data1 = data1.map(input_columns=["col_3d"], operations=type_cast_op) - data1 = data1.map(input_columns=["col_3d"], operations=type_cast_op) - data1 = data1.map(input_columns=["col_3d"], operations=type_cast_op) + data1 = data1.map(operations=type_cast_op, input_columns=["col_3d"]) + data1 = data1.map(operations=type_cast_op, input_columns=["col_3d"]) + data1 = data1.map(operations=type_cast_op, input_columns=["col_3d"]) + data1 = data1.map(operations=type_cast_op, input_columns=["col_3d"]) data1 = data1.project(columns=columns) - data1 = data1.map(input_columns=["col_3d"], operations=type_cast_op) - data1 = data1.map(input_columns=["col_3d"], operations=type_cast_op) - data1 = data1.map(input_columns=["col_3d"], operations=type_cast_op) - data1 = data1.map(input_columns=["col_3d"], operations=type_cast_op) - data1 = data1.map(input_columns=["col_3d"], operations=type_cast_op) + data1 = data1.map(operations=type_cast_op, input_columns=["col_3d"]) + data1 = data1.map(operations=type_cast_op, input_columns=["col_3d"]) + data1 = data1.map(operations=type_cast_op, input_columns=["col_3d"]) + data1 = data1.map(operations=type_cast_op, input_columns=["col_3d"]) + data1 = data1.map(operations=type_cast_op, input_columns=["col_3d"]) filename = "project_between_maps_result.npz" save_and_check_tuple(data1, parameters, filename, generate_golden=GENERATE_GOLDEN) @@ -145,11 +145,11 @@ def test_case_map_project_map_project(): data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, shuffle=False) type_cast_op = C.TypeCast(mstype.int64) - data1 = data1.map(input_columns=["col_sint64"], operations=type_cast_op) + data1 = data1.map(operations=type_cast_op, input_columns=["col_sint64"]) data1 = data1.project(columns=columns) - data1 = data1.map(input_columns=["col_2d"], operations=type_cast_op) + data1 = data1.map(operations=type_cast_op, input_columns=["col_2d"]) data1 = data1.project(columns=columns) diff --git a/tests/ut/python/dataset/test_pyfunc.py b/tests/ut/python/dataset/test_pyfunc.py index 5924d9ce12..d7eb447a4c 100644 --- a/tests/ut/python/dataset/test_pyfunc.py +++ b/tests/ut/python/dataset/test_pyfunc.py @@ -33,7 +33,7 @@ def test_case_0(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns="col0", output_columns="out", operations=(lambda x: x + x)) + data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out") i = 0 for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary @@ -53,7 +53,7 @@ def test_case_1(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns=col, output_columns=["out0", "out1"], operations=(lambda x: (x, x + x)), + data1 = data1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"], column_order=["out0", "out1"]) i = 0 @@ -77,7 +77,7 @@ def test_case_2(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns=col, output_columns="out", operations=(lambda x, y: x + y), + data1 = data1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out", column_order=["out"]) i = 0 @@ -99,8 +99,8 @@ def test_case_3(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns=col, output_columns=["out0", "out1", "out2"], - operations=(lambda x, y: (x, x + y, x + y + 1)), column_order=["out0", "out1", "out2"]) + data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col, + output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"]) i = 0 for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary @@ -125,8 +125,9 @@ def test_case_4(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns=col, output_columns=["out0", "out1", "out2"], num_parallel_workers=4, - operations=(lambda x, y: (x, x + y, x + y + 1)), column_order=["out0", "out1", "out2"]) + data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col, + output_columns=["out0", "out1", "out2"], num_parallel_workers=4, + column_order=["out0", "out1", "out2"]) i = 0 for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary @@ -154,7 +155,7 @@ def test_case_5(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns="col0", output_columns="out", operations=func_5) + data1 = data1.map(operations=func_5, input_columns="col0", output_columns="out") for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors @@ -171,8 +172,7 @@ def test_case_6(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns="col0", output_columns="out", - operations=[(lambda x: x + x), (lambda x: x + x)]) + data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + x)], input_columns="col0", output_columns="out") i = 0 for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary @@ -191,7 +191,7 @@ def test_case_7(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns="col0", output_columns="out", operations=(lambda x: x + x), + data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out", num_parallel_workers=4, python_multiprocessing=True) i = 0 @@ -213,8 +213,9 @@ def test_case_8(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns=col, output_columns=["out0", "out1", "out2"], num_parallel_workers=4, - operations=(lambda x, y: (x, x + y, x + y + 1)), column_order=["out0", "out1", "out2"], + data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col, + output_columns=["out0", "out1", "out2"], num_parallel_workers=4, + column_order=["out0", "out1", "out2"], python_multiprocessing=True) i = 0 @@ -238,9 +239,8 @@ def test_case_9(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns="col0", output_columns="out", operations=[(lambda x: x + x), (lambda x: x + 1), - (lambda x: x + 2)], - num_parallel_workers=4, python_multiprocessing=True) + data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + 1), (lambda x: x + 2)], input_columns="col0", + output_columns="out", num_parallel_workers=4, python_multiprocessing=True) i = 0 for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary @@ -259,7 +259,7 @@ def test_pyfunc_execption(): with pytest.raises(RuntimeError) as info: # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns="col0", output_columns="out", operations=pyfunc, + data1 = data1.map(operations=pyfunc, input_columns="col0", output_columns="out", num_parallel_workers=4) for _ in data1: pass @@ -275,7 +275,7 @@ def skip_test_pyfunc_execption_multiprocess(): with pytest.raises(RuntimeError) as info: # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) - data1 = data1.map(input_columns="col0", output_columns="out", operations=pyfunc, + data1 = data1.map(operations=pyfunc, input_columns="col0", output_columns="out", num_parallel_workers=4, python_multiprocessing=True) for _ in data1: pass diff --git a/tests/ut/python/dataset/test_random_affine.py b/tests/ut/python/dataset/test_random_affine.py index 60cf7d0889..407bec759d 100644 --- a/tests/ut/python/dataset/test_random_affine.py +++ b/tests/ut/python/dataset/test_random_affine.py @@ -52,10 +52,10 @@ def test_random_affine_op(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) image_affine = [] image_original = [] @@ -85,10 +85,10 @@ def test_random_affine_op_c(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transforms1) + data1 = data1.map(operations=transforms1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transforms2) + data2 = data2.map(operations=transforms2, input_columns=["image"]) image_affine = [] image_original = [] @@ -119,7 +119,7 @@ def test_random_affine_md5(): # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) # check results with md5 comparison filename = "random_affine_01_result.npz" @@ -146,7 +146,7 @@ def test_random_affine_c_md5(): # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=transforms) + data = data.map(operations=transforms, input_columns=["image"]) # check results with md5 comparison filename = "random_affine_01_c_result.npz" @@ -172,7 +172,7 @@ def test_random_affine_default_c_md5(): # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=transforms) + data = data.map(operations=transforms, input_columns=["image"]) # check results with md5 comparison filename = "random_affine_01_default_c_result.npz" @@ -192,7 +192,7 @@ def test_random_affine_py_exception_non_pil_images(): try: transform = mindspore.dataset.transforms.py_transforms.Compose([py_vision.ToTensor(), py_vision.RandomAffine(degrees=(15, 15))]) - dataset = dataset.map(input_columns=["image"], operations=transform, num_parallel_workers=3, + dataset = dataset.map(operations=transform, input_columns=["image"], num_parallel_workers=3, python_multiprocessing=True) for _ in dataset.create_dict_iterator(num_epochs=1): break diff --git a/tests/ut/python/dataset/test_random_apply.py b/tests/ut/python/dataset/test_random_apply.py index 5fca4cd7ed..44304be85d 100644 --- a/tests/ut/python/dataset/test_random_apply.py +++ b/tests/ut/python/dataset/test_random_apply.py @@ -51,10 +51,10 @@ def test_random_apply_op(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) image_apply = [] image_original = [] @@ -86,7 +86,7 @@ def test_random_apply_md5(): # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) # check results with md5 comparison filename = "random_apply_01_result.npz" @@ -117,7 +117,7 @@ def test_random_apply_exception_random_crop_badinput(): transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) try: _ = data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: diff --git a/tests/ut/python/dataset/test_random_choice.py b/tests/ut/python/dataset/test_random_choice.py index be7d06dff6..729f1bb92d 100644 --- a/tests/ut/python/dataset/test_random_choice.py +++ b/tests/ut/python/dataset/test_random_choice.py @@ -48,10 +48,10 @@ def test_random_choice_op(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) image_choice = [] image_original = [] @@ -87,10 +87,10 @@ def test_random_choice_comp(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) image_choice = [] image_original = [] @@ -123,7 +123,7 @@ def test_random_choice_exception_random_crop_badinput(): transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) try: _ = data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: diff --git a/tests/ut/python/dataset/test_random_color.py b/tests/ut/python/dataset/test_random_color.py index f6e08c34c5..c3701742a0 100644 --- a/tests/ut/python/dataset/test_random_color.py +++ b/tests/ut/python/dataset/test_random_color.py @@ -50,8 +50,7 @@ def test_random_color_py(degrees=(0.1, 1.9), plot=False): F.Resize((224, 224)), F.ToTensor()]) - ds_original = data.map(input_columns="image", - operations=transforms_original) + ds_original = data.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) @@ -71,8 +70,7 @@ def test_random_color_py(degrees=(0.1, 1.9), plot=False): F.RandomColor(degrees=degrees), F.ToTensor()]) - ds_random_color = data.map(input_columns="image", - operations=transforms_random_color) + ds_random_color = data.map(operations=transforms_random_color, input_columns="image") ds_random_color = ds_random_color.batch(512) @@ -113,8 +111,8 @@ def test_random_color_c(degrees=(0.1, 1.9), plot=False, run_golden=True): else: c_op = vision.RandomColor(degrees) - data1 = data1.map(input_columns=["image"], operations=[vision.Decode()]) - data2 = data2.map(input_columns=["image"], operations=[vision.Decode(), c_op]) + data1 = data1.map(operations=[vision.Decode()], input_columns=["image"]) + data2 = data2.map(operations=[vision.Decode(), c_op], input_columns=["image"]) image_random_color_op = [] image = [] @@ -153,7 +151,7 @@ def test_random_color_py_md5(): F.RandomColor((2.0, 2.5)), F.ToTensor()]) - data = data.map(input_columns="image", operations=transforms) + data = data.map(operations=transforms, input_columns="image") # Compare with expected md5 from images filename = "random_color_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -184,12 +182,13 @@ def test_compare_random_color_op(degrees=None, plot=False): c_op = vision.RandomColor(degrees) p_op = F.RandomColor(degrees) - transforms_random_color_py = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8), F.ToPIL(), - p_op, np.array]) + transforms_random_color_py = mindspore.dataset.transforms.py_transforms.Compose( + [lambda img: img.astype(np.uint8), F.ToPIL(), + p_op, np.array]) - data1 = data1.map(input_columns=["image"], operations=[vision.Decode(), c_op]) - data2 = data2.map(input_columns=["image"], operations=[vision.Decode()]) - data2 = data2.map(input_columns=["image"], operations=transforms_random_color_py) + data1 = data1.map(operations=[vision.Decode(), c_op], input_columns=["image"]) + data2 = data2.map(operations=[vision.Decode()], input_columns=["image"]) + data2 = data2.map(operations=transforms_random_color_py, input_columns=["image"]) image_random_color_op = [] image = [] @@ -234,7 +233,7 @@ def test_random_color_c_errors(): # RandomColor Cpp Op will fail with one channel input mnist_ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) - mnist_ds = mnist_ds.map(input_columns="image", operations=vision.RandomColor()) + mnist_ds = mnist_ds.map(operations=vision.RandomColor(), input_columns="image") with pytest.raises(RuntimeError) as error_info: for _ in enumerate(mnist_ds): diff --git a/tests/ut/python/dataset/test_random_color_adjust.py b/tests/ut/python/dataset/test_random_color_adjust.py index 6298bfdec7..c8c9f76f6b 100644 --- a/tests/ut/python/dataset/test_random_color_adjust.py +++ b/tests/ut/python/dataset/test_random_color_adjust.py @@ -31,6 +31,7 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" GENERATE_GOLDEN = False + def util_test_random_color_adjust_error(brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)): """ Util function that tests the error message in case of grayscale images @@ -45,13 +46,13 @@ def util_test_random_color_adjust_error(brightness=(1, 1), contrast=(1, 1), satu transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) # if input is grayscale, the output dimensions should be single channel, the following should fail random_adjust_op = c_vision.RandomColorAdjust(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue) with pytest.raises(RuntimeError) as info: - data1 = data1.map(input_columns=["image"], operations=random_adjust_op) + data1 = data1.map(operations=random_adjust_op, input_columns=["image"]) dataset_shape_1 = [] for item1 in data1.create_dict_iterator(num_epochs=1): c_image = item1["image"] @@ -78,7 +79,7 @@ def util_test_random_color_adjust_op(brightness=(1, 1), contrast=(1, 1), saturat random_adjust_op, ] - data1 = data1.map(input_columns=["image"], operations=ctrans) + data1 = data1.map(operations=ctrans, input_columns=["image"]) # Second dataset transforms = [ @@ -89,7 +90,7 @@ def util_test_random_color_adjust_op(brightness=(1, 1), contrast=(1, 1), saturat ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -203,8 +204,8 @@ def test_random_color_adjust_md5(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_adjust_op = c_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_adjust_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_adjust_op, input_columns=["image"]) # Second dataset transforms = [ @@ -214,7 +215,7 @@ def test_random_color_adjust_md5(): ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "random_color_adjust_01_c_result.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) diff --git a/tests/ut/python/dataset/test_random_crop.py b/tests/ut/python/dataset/test_random_crop.py index c1e0d67e64..f1bddace30 100644 --- a/tests/ut/python/dataset/test_random_crop.py +++ b/tests/ut/python/dataset/test_random_crop.py @@ -44,12 +44,12 @@ def test_random_crop_op_c(plot=False): random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) decode_op = c_vision.Decode() - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_crop_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) image_cropped = [] image = [] @@ -74,7 +74,7 @@ def test_random_crop_op_py(plot=False): py_vision.ToTensor() ] transform1 = mindspore.dataset.transforms.py_transforms.Compose(transforms1) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset # Second dataset for comparison data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -83,7 +83,7 @@ def test_random_crop_op_py(plot=False): py_vision.ToTensor() ] transform2 = mindspore.dataset.transforms.py_transforms.Compose(transforms2) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) crop_images = [] original_images = [] @@ -108,8 +108,8 @@ def test_random_crop_01_c(): # Note: If size is an int, a square crop of size (size, size) is returned. random_crop_op = c_vision.RandomCrop(512) decode_op = c_vision.Decode() - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_crop_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_01_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -135,7 +135,7 @@ def test_random_crop_01_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) filename = "random_crop_01_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -157,8 +157,8 @@ def test_random_crop_02_c(): # Note: If size is a sequence of length 2, it should be (height, width). random_crop_op = c_vision.RandomCrop([512, 375]) decode_op = c_vision.Decode() - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_crop_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_02_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -184,7 +184,7 @@ def test_random_crop_02_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) filename = "random_crop_02_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -206,8 +206,8 @@ def test_random_crop_03_c(): # Note: The size of the image is 4032*2268 random_crop_op = c_vision.RandomCrop([2268, 4032]) decode_op = c_vision.Decode() - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_crop_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_03_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -233,7 +233,7 @@ def test_random_crop_03_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) filename = "random_crop_03_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -253,8 +253,8 @@ def test_random_crop_04_c(): # Note: The size of the image is 4032*2268 random_crop_op = c_vision.RandomCrop([2268, 4033]) decode_op = c_vision.Decode() - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_crop_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_crop_op, input_columns=["image"]) try: data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: @@ -277,7 +277,7 @@ def test_random_crop_04_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) try: data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: @@ -299,8 +299,8 @@ def test_random_crop_05_c(): # Note: The size of the image is 4032*2268 random_crop_op = c_vision.RandomCrop([2268, 4033], [200, 200, 200, 200], pad_if_needed=True) decode_op = c_vision.Decode() - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_crop_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_05_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -328,7 +328,7 @@ def test_random_crop_05_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) filename = "random_crop_05_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -350,8 +350,8 @@ def test_random_crop_06_c(): # Note: if size is neither an int nor a list of length 2, an exception will raise random_crop_op = c_vision.RandomCrop([512, 512, 375]) decode_op = c_vision.Decode() - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_crop_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_crop_op, input_columns=["image"]) except TypeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Size should be a single integer" in str(e) @@ -373,7 +373,7 @@ def test_random_crop_06_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) except TypeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Size should be a single integer" in str(e) @@ -393,8 +393,8 @@ def test_random_crop_07_c(): # Note: The padding_mode is default as Border.CONSTANT and set filling color to be white. random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], fill_value=(255, 255, 255)) decode_op = c_vision.Decode() - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_crop_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_07_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -422,7 +422,7 @@ def test_random_crop_07_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) filename = "random_crop_07_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -445,8 +445,8 @@ def test_random_crop_08_c(): # Note: The padding_mode is Border.EDGE. random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE) decode_op = c_vision.Decode() - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_crop_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_08_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -473,7 +473,7 @@ def test_random_crop_08_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) filename = "random_crop_08_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -497,7 +497,7 @@ def test_random_crop_09(): py_vision.RandomCrop(512) ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) try: data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: @@ -515,8 +515,8 @@ def test_random_crop_comp(plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_crop_op = c_vision.RandomCrop(cropped_size) decode_op = c_vision.Decode() - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_crop_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -526,7 +526,7 @@ def test_random_crop_comp(plot=False): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) image_c_cropped = [] image_py_cropped = [] diff --git a/tests/ut/python/dataset/test_random_crop_and_resize.py b/tests/ut/python/dataset/test_random_crop_and_resize.py index f701a3bd91..d501af02b3 100644 --- a/tests/ut/python/dataset/test_random_crop_and_resize.py +++ b/tests/ut/python/dataset/test_random_crop_and_resize.py @@ -44,12 +44,12 @@ def test_random_crop_and_resize_op_c(plot=False): decode_op = c_vision.Decode() # With these inputs we expect the code to crop the whole image random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (2, 2), (1, 3)) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_crop_and_resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) num_iter = 0 crop_and_resize_images = [] original_images = [] @@ -82,7 +82,7 @@ def test_random_crop_and_resize_op_py(plot=False): py_vision.ToTensor() ] transform1 = mindspore.dataset.transforms.py_transforms.Compose(transforms1) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset # Second dataset for comparison data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -91,7 +91,7 @@ def test_random_crop_and_resize_op_py(plot=False): py_vision.ToTensor() ] transform2 = mindspore.dataset.transforms.py_transforms.Compose(transforms2) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) num_iter = 0 crop_and_resize_images = [] original_images = [] @@ -122,8 +122,8 @@ def test_random_crop_and_resize_01(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (0.5, 0.5), (1, 1)) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_crop_and_resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -133,7 +133,7 @@ def test_random_crop_and_resize_01(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) filename1 = "random_crop_and_resize_01_c_result.npz" filename2 = "random_crop_and_resize_01_py_result.npz" @@ -158,8 +158,8 @@ def test_random_crop_and_resize_02(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), interpolation=mode.Inter.NEAREST) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_crop_and_resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -169,7 +169,7 @@ def test_random_crop_and_resize_02(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) filename1 = "random_crop_and_resize_02_c_result.npz" filename2 = "random_crop_and_resize_02_py_result.npz" @@ -193,8 +193,8 @@ def test_random_crop_and_resize_03(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), max_attempts=1) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_crop_and_resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -204,7 +204,7 @@ def test_random_crop_and_resize_03(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) filename1 = "random_crop_and_resize_03_c_result.npz" filename2 = "random_crop_and_resize_03_py_result.npz" @@ -229,8 +229,8 @@ def test_random_crop_and_resize_04_c(): try: # If input range of scale is not in the order of (min, max), ValueError will be raised. random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (1, 0.5), (0.5, 0.5)) - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_crop_and_resize_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_crop_and_resize_op, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input is not within the required interval of (0 to 16777216)." in str(e) @@ -253,7 +253,7 @@ def test_random_crop_and_resize_04_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input is not within the required interval of (0 to 16777216)." in str(e) @@ -272,8 +272,8 @@ def test_random_crop_and_resize_05_c(): try: random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (1, 1), (1, 0.5)) # If input range of ratio is not in the order of (min, max), ValueError will be raised. - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_crop_and_resize_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_crop_and_resize_op, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input is not within the required interval of (0 to 16777216)." in str(e) @@ -296,7 +296,7 @@ def test_random_crop_and_resize_05_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input is not within the required interval of (0 to 16777216)." in str(e) @@ -312,8 +312,8 @@ def test_random_crop_and_resize_comp(plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_crop_and_resize_op = c_vision.RandomResizedCrop(512, (1, 1), (0.5, 0.5)) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_crop_and_resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -323,7 +323,7 @@ def test_random_crop_and_resize_comp(plot=False): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) image_c_cropped = [] image_py_cropped = [] @@ -350,20 +350,21 @@ def test_random_crop_and_resize_06(): decode_op = c_vision.Decode() try: random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), scale="", ratio=(1, 0.5)) - data = data.map(input_columns=["image"], operations=decode_op) - data.map(input_columns=["image"], operations=random_crop_and_resize_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data.map(operations=random_crop_and_resize_op, input_columns=["image"]) except TypeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Argument scale with value \"\" is not of type (,)" in str(e) try: random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), scale=(1, "2"), ratio=(1, 0.5)) - data = data.map(input_columns=["image"], operations=decode_op) - data.map(input_columns=["image"], operations=random_crop_and_resize_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data.map(operations=random_crop_and_resize_op, input_columns=["image"]) except TypeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Argument scale[1] with value 2 is not of type (, )." in str(e) + if __name__ == "__main__": test_random_crop_and_resize_op_c(True) test_random_crop_and_resize_op_py(True) diff --git a/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py b/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py index a2ed419160..bb93a924f7 100644 --- a/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py +++ b/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py @@ -48,10 +48,9 @@ def test_random_resized_crop_with_bbox_op_c(plot_vis=False): test_op = c_vision.RandomResizedCropWithBBox((256, 512), (0.5, 0.5), (0.5, 0.5)) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "random_resized_crop_with_bbox_01_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) @@ -85,10 +84,9 @@ def test_random_resized_crop_with_bbox_op_coco_c(plot_vis=False): test_op = c_vision.RandomResizedCropWithBBox((512, 512), (0.5, 1), (0.5, 1)) - dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"], + dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -114,16 +112,18 @@ def test_random_resized_crop_with_bbox_op_edge_c(plot_vis=False): test_op = c_vision.RandomResizedCropWithBBox((256, 512), (0.5, 0.5), (0.5, 0.5)) # maps to convert data into valid edge case data - dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))]) + dataVoc1 = dataVoc1.map( + operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))], + input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) # Test Op added to list of Operations here - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), test_op]) + dataVoc2 = dataVoc2.map( + operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), + test_op], input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -149,10 +149,9 @@ def test_random_resized_crop_with_bbox_op_invalid_c(): test_op = c_vision.RandomResizedCropWithBBox((256, 512), (1, 0.5), (0.5, 0.5)) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) for _ in dataVoc2.create_dict_iterator(num_epochs=1): break @@ -175,10 +174,9 @@ def test_random_resized_crop_with_bbox_op_invalid2_c(): test_op = c_vision.RandomResizedCropWithBBox((256, 512), (1, 1), (1, 0.5)) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) for _ in dataVoc2.create_dict_iterator(num_epochs=1): break diff --git a/tests/ut/python/dataset/test_random_crop_decode_resize.py b/tests/ut/python/dataset/test_random_crop_decode_resize.py index c2dc202c9c..ffda69630a 100644 --- a/tests/ut/python/dataset/test_random_crop_decode_resize.py +++ b/tests/ut/python/dataset/test_random_crop_decode_resize.py @@ -26,6 +26,7 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" GENERATE_GOLDEN = False + def test_random_crop_decode_resize_op(plot=False): """ Test RandomCropDecodeResize op @@ -36,14 +37,13 @@ def test_random_crop_decode_resize_op(plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() random_crop_decode_resize_op = vision.RandomCropDecodeResize((256, 512), (1, 1), (0.5, 0.5)) - data1 = data1.map(input_columns=["image"], operations=random_crop_decode_resize_op) + data1 = data1.map(operations=random_crop_decode_resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_crop_resize_op = vision.RandomResizedCrop((256, 512), (1, 1), (0.5, 0.5)) - data2 = data2.map(input_columns=["image"], operations=decode_op) - data2 = data2.map(input_columns=["image"], operations=random_crop_resize_op) - + data2 = data2.map(operations=decode_op, input_columns=["image"]) + data2 = data2.map(operations=random_crop_resize_op, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -70,7 +70,7 @@ def test_random_crop_decode_resize_md5(): # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_crop_decode_resize_op = vision.RandomCropDecodeResize((256, 512), (1, 1), (0.5, 0.5)) - data = data.map(input_columns=["image"], operations=random_crop_decode_resize_op) + data = data.map(operations=random_crop_decode_resize_op, input_columns=["image"]) # Compare with expected md5 from images filename = "random_crop_decode_resize_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) diff --git a/tests/ut/python/dataset/test_random_crop_with_bbox.py b/tests/ut/python/dataset/test_random_crop_with_bbox.py index 9d42ae9354..3cdec02f16 100644 --- a/tests/ut/python/dataset/test_random_crop_with_bbox.py +++ b/tests/ut/python/dataset/test_random_crop_with_bbox.py @@ -46,10 +46,9 @@ def test_random_crop_with_bbox_op_c(plot_vis=False): test_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200]) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) # Add column for "bbox" + column_order=["image", "bbox"]) # Add column for "bbox" unaugSamp, augSamp = [], [] @@ -76,10 +75,9 @@ def test_random_crop_with_bbox_op_coco_c(plot_vis=False): test_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200]) - dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"], + dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -108,10 +106,9 @@ def test_random_crop_with_bbox_op2_c(plot_vis=False): test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], fill_value=(255, 255, 255)) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "random_crop_with_bbox_01_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) @@ -145,10 +142,9 @@ def test_random_crop_with_bbox_op3_c(plot_vis=False): test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -175,18 +171,18 @@ def test_random_crop_with_bbox_op_edge_c(plot_vis=False): test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE) # maps to convert data into valid edge case data - dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[lambda img, bboxes: ( - img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))]) + dataVoc1 = dataVoc1.map( + operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))], + input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) # Test Op added to list of Operations here - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[lambda img, bboxes: ( - img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), test_op]) + dataVoc2 = dataVoc2.map( + operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), + test_op], input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -212,10 +208,9 @@ def test_random_crop_with_bbox_op_invalid_c(): test_op = c_vision.RandomCropWithBBox([512, 512, 375]) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) # Add column for "bbox" + column_order=["image", "bbox"]) # Add column for "bbox" for _ in dataVoc2.create_dict_iterator(num_epochs=1): break @@ -252,10 +247,9 @@ def test_random_crop_with_bbox_op_bad_padding(): try: test_op = c_vision.RandomCropWithBBox([512, 512], padding=-1) - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) for _ in dataVoc2.create_dict_iterator(num_epochs=1): break @@ -266,10 +260,9 @@ def test_random_crop_with_bbox_op_bad_padding(): try: test_op = c_vision.RandomCropWithBBox([512, 512], padding=[16777216, 16777216, 16777216, 16777216]) - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) for _ in dataVoc2.create_dict_iterator(num_epochs=1): break diff --git a/tests/ut/python/dataset/test_random_erasing.py b/tests/ut/python/dataset/test_random_erasing.py index 5dc3efd553..3ea212e29a 100644 --- a/tests/ut/python/dataset/test_random_erasing.py +++ b/tests/ut/python/dataset/test_random_erasing.py @@ -29,6 +29,7 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" GENERATE_GOLDEN = False + def test_random_erasing_op(plot=False): """ Test RandomErasing and Cutout @@ -43,7 +44,7 @@ def test_random_erasing_op(plot=False): vision.RandomErasing(value='random') ] transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) - data1 = data1.map(input_columns=["image"], operations=transform_1) + data1 = data1.map(operations=transform_1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -53,7 +54,7 @@ def test_random_erasing_op(plot=False): vision.Cutout(80) ] transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2) - data2 = data2.map(input_columns=["image"], operations=transform_2) + data2 = data2.map(operations=transform_2, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -88,7 +89,7 @@ def test_random_erasing_md5(): vision.RandomErasing(value='random') ] transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) - data = data.map(input_columns=["image"], operations=transform_1) + data = data.map(operations=transform_1, input_columns=["image"]) # Compare with expected md5 from images filename = "random_erasing_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) diff --git a/tests/ut/python/dataset/test_random_grayscale.py b/tests/ut/python/dataset/test_random_grayscale.py index ed97521063..30cf9bca41 100644 --- a/tests/ut/python/dataset/test_random_grayscale.py +++ b/tests/ut/python/dataset/test_random_grayscale.py @@ -29,6 +29,7 @@ GENERATE_GOLDEN = False DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" + def test_random_grayscale_valid_prob(plot=False): """ Test RandomGrayscale Op: valid input, expect to pass @@ -44,7 +45,7 @@ def test_random_grayscale_valid_prob(plot=False): py_vision.ToTensor() ] transform1 = mindspore.dataset.transforms.py_transforms.Compose(transforms1) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -53,7 +54,7 @@ def test_random_grayscale_valid_prob(plot=False): py_vision.ToTensor() ] transform2 = mindspore.dataset.transforms.py_transforms.Compose(transforms2) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) image_gray = [] image = [] @@ -65,6 +66,7 @@ def test_random_grayscale_valid_prob(plot=False): if plot: visualize_list(image, image_gray) + def test_random_grayscale_input_grayscale_images(): """ Test RandomGrayscale Op: valid parameter with grayscale images as input, expect to pass @@ -83,7 +85,7 @@ def test_random_grayscale_input_grayscale_images(): py_vision.ToTensor() ] transform1 = mindspore.dataset.transforms.py_transforms.Compose(transforms1) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -92,7 +94,7 @@ def test_random_grayscale_input_grayscale_images(): py_vision.ToTensor() ] transform2 = mindspore.dataset.transforms.py_transforms.Compose(transforms2) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) image_gray = [] image = [] @@ -111,6 +113,7 @@ def test_random_grayscale_input_grayscale_images(): ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) + def test_random_grayscale_md5_valid_input(): """ Test RandomGrayscale with md5 comparison: valid parameter, expect to pass @@ -127,7 +130,7 @@ def test_random_grayscale_md5_valid_input(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) # Check output images with md5 comparison filename = "random_grayscale_01_result.npz" @@ -137,6 +140,7 @@ def test_random_grayscale_md5_valid_input(): ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) + def test_random_grayscale_md5_no_param(): """ Test RandomGrayscale with md5 comparison: no parameter given, expect to pass @@ -153,7 +157,7 @@ def test_random_grayscale_md5_no_param(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) # Check output images with md5 comparison filename = "random_grayscale_02_result.npz" @@ -163,6 +167,7 @@ def test_random_grayscale_md5_no_param(): ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) + def test_random_grayscale_invalid_param(): """ Test RandomGrayscale: invalid parameter given, expect to raise error @@ -178,11 +183,12 @@ def test_random_grayscale_invalid_param(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(e) + if __name__ == "__main__": test_random_grayscale_valid_prob(True) test_random_grayscale_input_grayscale_images() diff --git a/tests/ut/python/dataset/test_random_horizontal_flip.py b/tests/ut/python/dataset/test_random_horizontal_flip.py index a88e6765be..4e7163fb25 100644 --- a/tests/ut/python/dataset/test_random_horizontal_flip.py +++ b/tests/ut/python/dataset/test_random_horizontal_flip.py @@ -51,12 +51,12 @@ def test_random_horizontal_op(plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_horizontal_op = c_vision.RandomHorizontalFlip(1.0) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_horizontal_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_horizontal_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -89,8 +89,8 @@ def test_random_horizontal_valid_prob_c(): data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_horizontal_op = c_vision.RandomHorizontalFlip(0.8) - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_horizontal_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_horizontal_op, input_columns=["image"]) filename = "random_horizontal_01_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -116,7 +116,7 @@ def test_random_horizontal_valid_prob_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) filename = "random_horizontal_01_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -138,8 +138,8 @@ def test_random_horizontal_invalid_prob_c(): try: # Note: Valid range of prob should be [0.0, 1.0] random_horizontal_op = c_vision.RandomHorizontalFlip(1.5) - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_horizontal_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_horizontal_op, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(e) @@ -162,7 +162,7 @@ def test_random_horizontal_invalid_prob_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(e) @@ -178,8 +178,8 @@ def test_random_horizontal_comp(plot=False): decode_op = c_vision.Decode() # Note: The image must be flipped if prob is set to be 1 random_horizontal_op = c_vision.RandomHorizontalFlip(1) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_horizontal_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_horizontal_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -190,7 +190,7 @@ def test_random_horizontal_comp(plot=False): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) images_list_c = [] images_list_py = [] diff --git a/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py b/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py index 58e67787eb..22c1eed661 100644 --- a/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py +++ b/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py @@ -43,10 +43,9 @@ def test_random_horizontal_flip_with_bbox_op_c(plot_vis=False): test_op = c_vision.RandomHorizontalFlipWithBBox(1) - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -73,10 +72,9 @@ def test_random_horizontal_flip_with_bbox_op_coco_c(plot_vis=False): test_op = c_vision.RandomHorizontalFlipWithBBox(1) - dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"], + dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -107,10 +105,9 @@ def test_random_horizontal_flip_with_bbox_valid_rand_c(plot_vis=False): test_op = c_vision.RandomHorizontalFlipWithBBox(0.6) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "random_horizontal_flip_with_bbox_01_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) @@ -143,20 +140,19 @@ def test_random_horizontal_flip_with_bbox_valid_edge_c(plot_vis=False): # map to apply ops # Add column for "bbox" - dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"], + dataVoc1 = dataVoc1.map( + operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)), + input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) + dataVoc2 = dataVoc2.map( + operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)), + input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=lambda img, bbox: - (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32))) - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=lambda img, bbox: - (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32))) - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -180,10 +176,9 @@ def test_random_horizontal_flip_with_bbox_invalid_prob_c(): # Note: Valid range of prob should be [0.0, 1.0] test_op = c_vision.RandomHorizontalFlipWithBBox(1.5) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) # Add column for "bbox" + column_order=["image", "bbox"]) # Add column for "bbox" except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(error) diff --git a/tests/ut/python/dataset/test_random_order.py b/tests/ut/python/dataset/test_random_order.py index c882d822cc..74bd5958cc 100644 --- a/tests/ut/python/dataset/test_random_order.py +++ b/tests/ut/python/dataset/test_random_order.py @@ -51,10 +51,10 @@ def test_random_order_op(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) image_order = [] image_original = [] @@ -85,7 +85,7 @@ def test_random_order_md5(): # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) # check results with md5 comparison filename = "random_order_01_result.npz" diff --git a/tests/ut/python/dataset/test_random_perspective.py b/tests/ut/python/dataset/test_random_perspective.py index 94d888a784..94e00783a8 100644 --- a/tests/ut/python/dataset/test_random_perspective.py +++ b/tests/ut/python/dataset/test_random_perspective.py @@ -52,10 +52,10 @@ def test_random_perspective_op(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) image_perspective = [] image_original = [] @@ -88,7 +88,7 @@ def skip_test_random_perspective_md5(): # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) # check results with md5 comparison filename = "random_perspective_01_result.npz" diff --git a/tests/ut/python/dataset/test_random_posterize.py b/tests/ut/python/dataset/test_random_posterize.py index 2d79b0bc1e..de910439c0 100644 --- a/tests/ut/python/dataset/test_random_posterize.py +++ b/tests/ut/python/dataset/test_random_posterize.py @@ -46,10 +46,10 @@ def test_random_posterize_op_c(plot=False, run_golden=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transforms1) + data1 = data1.map(operations=transforms1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode()]) + data2 = data2.map(operations=[c_vision.Decode()], input_columns=["image"]) image_posterize = [] image_original = [] @@ -92,10 +92,10 @@ def test_random_posterize_op_fixed_point_c(plot=False, run_golden=True): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transforms1) + data1 = data1.map(operations=transforms1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode()]) + data2 = data2.map(operations=[c_vision.Decode()], input_columns=["image"]) image_posterize = [] image_original = [] @@ -129,10 +129,10 @@ def test_random_posterize_default_c_md5(plot=False, run_golden=True): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data1 = data1.map(input_columns=["image"], operations=transforms1) + data1 = data1.map(operations=transforms1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode()]) + data2 = data2.map(operations=[c_vision.Decode()], input_columns=["image"]) image_posterize = [] image_original = [] @@ -202,10 +202,10 @@ def test_rescale_with_random_posterize(): dataset = ds.Cifar10Dataset(DATA_DIR_10) rescale_op = c_vision.Rescale((1.0 / 255.0), 0.0) - dataset = dataset.map(input_columns=["image"], operations=rescale_op) + dataset = dataset.map(operations=rescale_op, input_columns=["image"]) random_posterize_op = c_vision.RandomPosterize((4, 8)) - dataset = dataset.map(input_columns=["image"], operations=random_posterize_op, num_parallel_workers=1) + dataset = dataset.map(operations=random_posterize_op, input_columns=["image"], num_parallel_workers=1) try: _ = dataset.output_shapes() diff --git a/tests/ut/python/dataset/test_random_resize.py b/tests/ut/python/dataset/test_random_resize.py index d3ffefd67c..2db2edc167 100644 --- a/tests/ut/python/dataset/test_random_resize.py +++ b/tests/ut/python/dataset/test_random_resize.py @@ -38,9 +38,9 @@ def test_random_resize_op(plot=False): resize_op = vision.RandomResize(10) # apply map operations on images - data1 = data1.map(input_columns=["image"], operations=decode_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) - data2 = data1.map(input_columns=["image"], operations=resize_op) + data2 = data1.map(operations=resize_op, input_columns=["image"]) image_original = [] image_resized = [] num_iter = 0 @@ -66,8 +66,8 @@ def test_random_resize_md5(): data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() resize_op = vision.RandomResize(10) - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=resize_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=resize_op, input_columns=["image"]) # Compare with expected md5 from images filename = "random_resize_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) diff --git a/tests/ut/python/dataset/test_random_resize_with_bbox.py b/tests/ut/python/dataset/test_random_resize_with_bbox.py index d698216ea2..f36b6e45ce 100644 --- a/tests/ut/python/dataset/test_random_resize_with_bbox.py +++ b/tests/ut/python/dataset/test_random_resize_with_bbox.py @@ -46,10 +46,9 @@ def test_random_resize_with_bbox_op_voc_c(plot_vis=False): test_op = c_vision.RandomResizeWithBBox(100) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "random_resize_with_bbox_op_01_c_voc_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) @@ -89,10 +88,9 @@ def test_random_resize_with_bbox_op_rand_coco_c(plot_vis=False): # map to apply ops - dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"], + dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "random_resize_with_bbox_op_01_c_coco_result.npz" save_and_check_md5(dataCoco2, filename, generate_golden=GENERATE_GOLDEN) @@ -125,17 +123,17 @@ def test_random_resize_with_bbox_op_edge_c(plot_vis=False): test_op = c_vision.RandomResizeWithBBox(500) # maps to convert data into valid edge case data - dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[lambda img, bboxes: ( - img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))]) - - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[lambda img, bboxes: ( - img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), test_op]) + dataVoc1 = dataVoc1.map( + operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))], + input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) + + dataVoc2 = dataVoc2.map( + operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), + test_op], input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] diff --git a/tests/ut/python/dataset/test_random_rotation.py b/tests/ut/python/dataset/test_random_rotation.py index 4eede355ec..e8aec50ed3 100644 --- a/tests/ut/python/dataset/test_random_rotation.py +++ b/tests/ut/python/dataset/test_random_rotation.py @@ -44,12 +44,12 @@ def test_random_rotation_op_c(plot=False): decode_op = c_vision.Decode() # use [90, 90] to force rotate 90 degrees, expand is set to be True to match output size random_rotation_op = c_vision.RandomRotation((90, 90), expand=True) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_rotation_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_rotation_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -79,13 +79,13 @@ def test_random_rotation_op_py(plot=False): transform1 = mindspore.dataset.transforms.py_transforms.Compose([py_vision.Decode(), py_vision.RandomRotation((90, 90), expand=True), py_vision.ToTensor()]) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transform2 = mindspore.dataset.transforms.py_transforms.Compose([py_vision.Decode(), py_vision.ToTensor()]) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -114,8 +114,8 @@ def test_random_rotation_expand(): decode_op = c_vision.Decode() # expand is set to be True to match output size random_rotation_op = c_vision.RandomRotation((0, 90), expand=True) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_rotation_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_rotation_op, input_columns=["image"]) num_iter = 0 for item in data1.create_dict_iterator(num_epochs=1): @@ -140,8 +140,8 @@ def test_random_rotation_md5(): resample=Inter.BILINEAR, center=(50, 50), fill_value=150) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) @@ -152,7 +152,7 @@ def test_random_rotation_md5(): center=(50, 50), fill_value=150), py_vision.ToTensor()]) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) # Compare with expected md5 from images filename1 = "random_rotation_01_c_result.npz" @@ -180,7 +180,7 @@ def test_rotation_diff(plot=False): rotation_op ] - data1 = data1.map(input_columns=["image"], operations=ctrans) + data1 = data1.map(operations=ctrans, input_columns=["image"]) # Second dataset transforms = [ @@ -190,7 +190,7 @@ def test_rotation_diff(plot=False): ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) num_iter = 0 image_list_c, image_list_py = [], [] diff --git a/tests/ut/python/dataset/test_random_select_subpolicy.py b/tests/ut/python/dataset/test_random_select_subpolicy.py index 8226ee6663..f08bbacac5 100644 --- a/tests/ut/python/dataset/test_random_select_subpolicy.py +++ b/tests/ut/python/dataset/test_random_select_subpolicy.py @@ -24,7 +24,7 @@ def test_random_select_subpolicy(): def test_config(arr, policy): try: data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) - data = data.map(input_columns=["col"], operations=visions.RandomSelectSubpolicy(policy)) + data = data.map(operations=visions.RandomSelectSubpolicy(policy), input_columns=["col"]) res = [] for i in data.create_dict_iterator(num_epochs=1): res.append(i["col"].tolist()) diff --git a/tests/ut/python/dataset/test_random_sharpness.py b/tests/ut/python/dataset/test_random_sharpness.py index 838d23aaaf..5191d8538e 100644 --- a/tests/ut/python/dataset/test_random_sharpness.py +++ b/tests/ut/python/dataset/test_random_sharpness.py @@ -44,8 +44,7 @@ def test_random_sharpness_py(degrees=(0.7, 0.7), plot=False): F.Resize((224, 224)), F.ToTensor()]) - ds_original = data.map(input_columns="image", - operations=transforms_original) + ds_original = data.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) @@ -69,8 +68,7 @@ def test_random_sharpness_py(degrees=(0.7, 0.7), plot=False): py_op, F.ToTensor()]) - ds_random_sharpness = data.map(input_columns="image", - operations=transforms_random_sharpness) + ds_random_sharpness = data.map(operations=transforms_random_sharpness, input_columns="image") ds_random_sharpness = ds_random_sharpness.batch(512) @@ -111,7 +109,7 @@ def test_random_sharpness_py_md5(): # Generate dataset data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) # check results with md5 comparison filename = "random_sharpness_py_01_result.npz" @@ -135,8 +133,7 @@ def test_random_sharpness_c(degrees=(1.6, 1.6), plot=False): transforms_original = [C.Decode(), C.Resize((224, 224))] - ds_original = data.map(input_columns="image", - operations=transforms_original) + ds_original = data.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) @@ -159,8 +156,7 @@ def test_random_sharpness_c(degrees=(1.6, 1.6), plot=False): C.Resize((224, 224)), c_op] - ds_random_sharpness = data.map(input_columns="image", - operations=transforms_random_sharpness) + ds_random_sharpness = data.map(operations=transforms_random_sharpness, input_columns="image") ds_random_sharpness = ds_random_sharpness.batch(512) @@ -199,7 +195,7 @@ def test_random_sharpness_c_md5(): # Generate dataset data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - data = data.map(input_columns=["image"], operations=transforms) + data = data.map(operations=transforms, input_columns=["image"]) # check results with md5 comparison filename = "random_sharpness_cpp_01_result.npz" @@ -218,9 +214,7 @@ def test_random_sharpness_c_py(degrees=(1.0, 1.0), plot=False): # RandomSharpness Images data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - data = data.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((200, 300))]) + data = data.map(operations=[C.Decode(), C.Resize((200, 300))], input_columns=["image"]) python_op = F.RandomSharpness(degrees) c_op = C.RandomSharpness(degrees) @@ -229,8 +223,7 @@ def test_random_sharpness_c_py(degrees=(1.0, 1.0), plot=False): python_op, np.array]) - ds_random_sharpness_py = data.map(input_columns="image", - operations=transforms_op) + ds_random_sharpness_py = data.map(operations=transforms_op, input_columns="image") ds_random_sharpness_py = ds_random_sharpness_py.batch(512) @@ -244,12 +237,9 @@ def test_random_sharpness_c_py(degrees=(1.0, 1.0), plot=False): axis=0) data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - data = data.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((200, 300))]) + data = data.map(operations=[C.Decode(), C.Resize((200, 300))], input_columns=["image"]) - ds_images_random_sharpness_c = data.map(input_columns="image", - operations=c_op) + ds_images_random_sharpness_c = data.map(operations=c_op, input_columns="image") ds_images_random_sharpness_c = ds_images_random_sharpness_c.batch(512) @@ -282,7 +272,7 @@ def test_random_sharpness_one_channel_c(degrees=(1.4, 1.4), plot=False): c_op = C.RandomSharpness(degrees) # RandomSharpness Images data = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) - ds_random_sharpness_c = data.map(input_columns="image", operations=c_op) + ds_random_sharpness_c = data.map(operations=c_op, input_columns="image") # Original images data = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) @@ -307,30 +297,24 @@ def test_random_sharpness_invalid_params(): logger.info("Test RandomSharpness with invalid input parameters.") try: data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - data = data.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((224, 224)), - C.RandomSharpness(10)]) + data = data.map(operations=[C.Decode(), C.Resize((224, 224)), + C.RandomSharpness(10)], input_columns=["image"]) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "tuple" in str(error) try: data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - data = data.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((224, 224)), - C.RandomSharpness((-10, 10))]) + data = data.map(operations=[C.Decode(), C.Resize((224, 224)), + C.RandomSharpness((-10, 10))], input_columns=["image"]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "interval" in str(error) try: data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - data = data.map(input_columns=["image"], - operations=[C.Decode(), - C.Resize((224, 224)), - C.RandomSharpness((10, 5))]) + data = data.map(operations=[C.Decode(), C.Resize((224, 224)), + C.RandomSharpness((10, 5))], input_columns=["image"]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "(min,max)" in str(error) @@ -339,11 +323,13 @@ def test_random_sharpness_invalid_params(): if __name__ == "__main__": test_random_sharpness_py(plot=True) test_random_sharpness_py(None, plot=True) # Test with default values - test_random_sharpness_py(degrees=(20.0, 25.0), plot=True) # Test with degree values that show more obvious transformation + test_random_sharpness_py(degrees=(20.0, 25.0), + plot=True) # Test with degree values that show more obvious transformation test_random_sharpness_py_md5() test_random_sharpness_c(plot=True) test_random_sharpness_c(None, plot=True) # test with default values - test_random_sharpness_c(degrees=[10, 15], plot=True) # Test with degrees values that show more obvious transformation + test_random_sharpness_c(degrees=[10, 15], + plot=True) # Test with degrees values that show more obvious transformation test_random_sharpness_c_md5() test_random_sharpness_c_py(degrees=[1.5, 1.5], plot=True) test_random_sharpness_c_py(degrees=[1, 1], plot=True) diff --git a/tests/ut/python/dataset/test_random_solarize_op.py b/tests/ut/python/dataset/test_random_solarize_op.py index 09a0a92c00..18ea4a13c7 100644 --- a/tests/ut/python/dataset/test_random_solarize_op.py +++ b/tests/ut/python/dataset/test_random_solarize_op.py @@ -48,12 +48,12 @@ def test_random_solarize_op(threshold=(10, 150), plot=False, run_golden=True): else: solarize_op = vision.RandomSolarize(threshold) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=solarize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=solarize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) if run_golden: filename = "random_solarize_01_result.npz" @@ -79,7 +79,7 @@ def test_random_solarize_mnist(plot=False, run_golden=True): mnist_1 = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) mnist_2 = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) - mnist_2 = mnist_2.map(input_columns="image", operations=vision.RandomSolarize((0, 255))) + mnist_2 = mnist_2.map(operations=vision.RandomSolarize((0, 255)), input_columns="image") images = [] images_trans = [] diff --git a/tests/ut/python/dataset/test_random_vertical_flip.py b/tests/ut/python/dataset/test_random_vertical_flip.py index 004b19fcd6..27db8888a3 100644 --- a/tests/ut/python/dataset/test_random_vertical_flip.py +++ b/tests/ut/python/dataset/test_random_vertical_flip.py @@ -51,12 +51,12 @@ def test_random_vertical_op(plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_vertical_op = c_vision.RandomVerticalFlip(1.0) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_vertical_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_vertical_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -89,8 +89,8 @@ def test_random_vertical_valid_prob_c(): data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_horizontal_op = c_vision.RandomVerticalFlip(0.8) - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_horizontal_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_horizontal_op, input_columns=["image"]) filename = "random_vertical_01_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -116,7 +116,7 @@ def test_random_vertical_valid_prob_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) filename = "random_vertical_01_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) @@ -138,8 +138,8 @@ def test_random_vertical_invalid_prob_c(): try: # Note: Valid range of prob should be [0.0, 1.0] random_horizontal_op = c_vision.RandomVerticalFlip(1.5) - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=random_horizontal_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=random_horizontal_op, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert 'Input prob is not within the required interval of (0.0 to 1.0).' in str(e) @@ -161,7 +161,7 @@ def test_random_vertical_invalid_prob_py(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert 'Input prob is not within the required interval of (0.0 to 1.0).' in str(e) @@ -178,8 +178,8 @@ def test_random_vertical_comp(plot=False): decode_op = c_vision.Decode() # Note: The image must be flipped if prob is set to be 1 random_horizontal_op = c_vision.RandomVerticalFlip(1) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=random_horizontal_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=random_horizontal_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -190,7 +190,7 @@ def test_random_vertical_comp(plot=False): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) images_list_c = [] images_list_py = [] diff --git a/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py b/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py index 84af16e548..f226a8dabe 100644 --- a/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py +++ b/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py @@ -44,10 +44,9 @@ def test_random_vertical_flip_with_bbox_op_c(plot_vis=False): test_op = c_vision.RandomVerticalFlipWithBBox(1) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -58,6 +57,7 @@ def test_random_vertical_flip_with_bbox_op_c(plot_vis=False): if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) + def test_random_vertical_flip_with_bbox_op_coco_c(plot_vis=False): """ Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied, @@ -73,10 +73,9 @@ def test_random_vertical_flip_with_bbox_op_coco_c(plot_vis=False): test_op = c_vision.RandomVerticalFlipWithBBox(1) - dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"], + dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) test_op = c_vision.RandomVerticalFlipWithBBox(1) @@ -107,10 +106,9 @@ def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False): test_op = c_vision.RandomVerticalFlipWithBBox(0.8) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "random_vertical_flip_with_bbox_01_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) @@ -142,16 +140,18 @@ def test_random_vertical_flip_with_bbox_op_edge_c(plot_vis=False): test_op = c_vision.RandomVerticalFlipWithBBox(1) # maps to convert data into valid edge case data - dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))]) + dataVoc1 = dataVoc1.map( + operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))], + input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) # Test Op added to list of Operations here - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), test_op]) + dataVoc2 = dataVoc2.map( + operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), + test_op], input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] @@ -174,10 +174,9 @@ def test_random_vertical_flip_with_bbox_op_invalid_c(): test_op = c_vision.RandomVerticalFlipWithBBox(2) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) for _ in dataVoc2.create_dict_iterator(num_epochs=1): break diff --git a/tests/ut/python/dataset/test_repeat.py b/tests/ut/python/dataset/test_repeat.py index 3bf065361a..ee21fb831c 100644 --- a/tests/ut/python/dataset/test_repeat.py +++ b/tests/ut/python/dataset/test_repeat.py @@ -78,8 +78,8 @@ def test_tf_repeat_03(): resize_height, resize_width = 32, 32 decode_op = vision.Decode() resize_op = vision.Resize((resize_height, resize_width), interpolation=ds.transforms.vision.Inter.LINEAR) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=resize_op, input_columns=["image"]) data1 = data1.repeat(22) data1 = data1.batch(batch_size, drop_remainder=True) @@ -251,6 +251,7 @@ def test_nested_repeat11(): assert sum([1 for _ in data]) == 2 * 3 * 4 * 5 * 3 + def test_repeat_count1(): data1 = ds.TFRecordDataset(DATA_DIR_TF2, SCHEMA_DIR_TF2, shuffle=False) data1_size = data1.get_dataset_size() @@ -260,8 +261,8 @@ def test_repeat_count1(): resize_height, resize_width = 32, 32 decode_op = vision.Decode() resize_op = vision.Resize((resize_height, resize_width), interpolation=ds.transforms.vision.Inter.LINEAR) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=resize_op, input_columns=["image"]) data1 = data1.repeat(repeat_count) data1 = data1.batch(batch_size, drop_remainder=False) dataset_size = data1.get_dataset_size() @@ -273,6 +274,7 @@ def test_repeat_count1(): assert data1_size == 3 assert dataset_size == num1_iter == 6 + def test_repeat_count2(): data1 = ds.TFRecordDataset(DATA_DIR_TF2, SCHEMA_DIR_TF2, shuffle=False) data1_size = data1.get_dataset_size() @@ -282,8 +284,8 @@ def test_repeat_count2(): resize_height, resize_width = 32, 32 decode_op = vision.Decode() resize_op = vision.Resize((resize_height, resize_width), interpolation=ds.transforms.vision.Inter.LINEAR) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=resize_op, input_columns=["image"]) data1 = data1.batch(batch_size, drop_remainder=False) data1 = data1.repeat(repeat_count) dataset_size = data1.get_dataset_size() @@ -295,6 +297,7 @@ def test_repeat_count2(): assert data1_size == 3 assert dataset_size == num1_iter == 8 + def test_repeat_count0(): """ Test Repeat with invalid count 0. @@ -305,6 +308,7 @@ def test_repeat_count0(): data1.repeat(0) assert "count" in str(info.value) + def test_repeat_countneg2(): """ Test Repeat with invalid count -2. @@ -315,6 +319,7 @@ def test_repeat_countneg2(): data1.repeat(-2) assert "count" in str(info.value) + if __name__ == "__main__": test_tf_repeat_01() test_tf_repeat_02() diff --git a/tests/ut/python/dataset/test_rescale_op.py b/tests/ut/python/dataset/test_rescale_op.py index a20f296455..64633ad214 100644 --- a/tests/ut/python/dataset/test_rescale_op.py +++ b/tests/ut/python/dataset/test_rescale_op.py @@ -25,6 +25,7 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" GENERATE_GOLDEN = False + def rescale_np(image): """ Apply the rescale @@ -40,7 +41,7 @@ def get_rescaled(image_id): """ data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() - data1 = data1.map(input_columns=["image"], operations=decode_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) num_iter = 0 for item in data1.create_dict_iterator(num_epochs=1): image = item["image"] @@ -63,9 +64,9 @@ def test_rescale_op(plot=False): rescale_op = vision.Rescale(1.0 / 255.0, -1.0) # apply map operations on images - data1 = data1.map(input_columns=["image"], operations=decode_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) - data2 = data1.map(input_columns=["image"], operations=rescale_op) + data2 = data1.map(operations=rescale_op, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -73,7 +74,7 @@ def test_rescale_op(plot=False): image_de_rescaled = item2["image"] image_np_rescaled = get_rescaled(num_iter) mse = diff_mse(image_de_rescaled, image_np_rescaled) - assert mse < 0.001 # rounding error + assert mse < 0.001 # rounding error logger.info("image_{}, mse: {}".format(num_iter + 1, mse)) num_iter += 1 if plot: @@ -92,8 +93,8 @@ def test_rescale_md5(): rescale_op = vision.Rescale(1.0 / 255.0, -1.0) # apply map operations on images - data = data.map(input_columns=["image"], operations=decode_op) - data = data.map(input_columns=["image"], operations=rescale_op) + data = data.map(operations=decode_op, input_columns=["image"]) + data = data.map(operations=rescale_op, input_columns=["image"]) # check results with md5 comparison filename = "rescale_01_result.npz" diff --git a/tests/ut/python/dataset/test_resize.py b/tests/ut/python/dataset/test_resize.py index 1402c6f95e..03ec60731e 100644 --- a/tests/ut/python/dataset/test_resize.py +++ b/tests/ut/python/dataset/test_resize.py @@ -42,9 +42,9 @@ def test_resize_op(plot=False): resize_op = vision.Resize(size) # apply map operations on images - data1 = data1.map(input_columns=["image"], operations=decode_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) - data2 = data1.map(input_columns=["image"], operations=resize_op) + data2 = data1.map(operations=resize_op, input_columns=["image"]) image_original = [] image_resized = [] for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -72,8 +72,8 @@ def test_resize_md5(plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() resize_op = vision.Resize(size) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data2 = data1.map(input_columns=["image"], operations=resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data2 = data1.map(operations=resize_op, input_columns=["image"]) image_original = [] image_resized = [] # Compare with expected md5 from images diff --git a/tests/ut/python/dataset/test_resize_with_bbox.py b/tests/ut/python/dataset/test_resize_with_bbox.py index 566a1f01f9..74d1340e06 100644 --- a/tests/ut/python/dataset/test_resize_with_bbox.py +++ b/tests/ut/python/dataset/test_resize_with_bbox.py @@ -46,10 +46,9 @@ def test_resize_with_bbox_op_voc_c(plot_vis=False): test_op = c_vision.ResizeWithBBox(100) # map to apply ops - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], + dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "resize_with_bbox_op_01_c_voc_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) @@ -83,10 +82,9 @@ def test_resize_with_bbox_op_coco_c(plot_vis=False): # map to apply ops - dataCOCO2 = dataCOCO2.map(input_columns=["image", "bbox"], + dataCOCO2 = dataCOCO2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) + column_order=["image", "bbox"]) filename = "resize_with_bbox_op_01_c_coco_result.npz" save_and_check_md5(dataCOCO2, filename, generate_golden=GENERATE_GOLDEN) @@ -115,17 +113,17 @@ def test_resize_with_bbox_op_edge_c(plot_vis=False): test_op = c_vision.ResizeWithBBox(500) # maps to convert data into valid edge case data - dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[lambda img, bboxes: ( - img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))]) - - dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], - output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[lambda img, bboxes: ( - img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), test_op]) + dataVoc1 = dataVoc1.map( + operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))], + input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) + + dataVoc2 = dataVoc2.map( + operations=[lambda img, bboxes: (img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), + test_op], input_columns=["image", "bbox"], + output_columns=["image", "bbox"], + column_order=["image", "bbox"]) unaugSamp, augSamp = [], [] diff --git a/tests/ut/python/dataset/test_rgb_hsv.py b/tests/ut/python/dataset/test_rgb_hsv.py index 0bfc35be02..4f5672a652 100644 --- a/tests/ut/python/dataset/test_rgb_hsv.py +++ b/tests/ut/python/dataset/test_rgb_hsv.py @@ -140,7 +140,7 @@ def test_rgb_hsv_pipeline(): ] transforms1 = mindspore.dataset.transforms.py_transforms.Compose(transforms1) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - ds1 = ds1.map(input_columns=["image"], operations=transforms1) + ds1 = ds1.map(operations=transforms1, input_columns=["image"]) # Second dataset transforms2 = [ @@ -152,7 +152,7 @@ def test_rgb_hsv_pipeline(): ] transform2 = mindspore.dataset.transforms.py_transforms.Compose(transforms2) ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - ds2 = ds2.map(input_columns=["image"], operations=transform2) + ds2 = ds2.map(operations=transform2, input_columns=["image"]) num_iter = 0 for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): diff --git a/tests/ut/python/dataset/test_serdes_dataset.py b/tests/ut/python/dataset/test_serdes_dataset.py index 3e0b6336f1..718635e666 100644 --- a/tests/ut/python/dataset/test_serdes_dataset.py +++ b/tests/ut/python/dataset/test_serdes_dataset.py @@ -31,8 +31,6 @@ from mindspore import log as logger from mindspore.dataset.vision import Inter - - def test_imagefolder(remove_json_files=True): """ Test simulating resnet50 dataset pipeline. @@ -50,11 +48,11 @@ def test_imagefolder(remove_json_files=True): sampler = ds.WeightedRandomSampler(weights, 11) data1 = ds.ImageFolderDataset(data_dir, sampler=sampler) data1 = data1.repeat(1) - data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)]) + data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"]) rescale_op = vision.Rescale(rescale, shift) resize_op = vision.Resize((resize_height, resize_width), Inter.LINEAR) - data1 = data1.map(input_columns=["image"], operations=[rescale_op, resize_op]) + data1 = data1.map(operations=[rescale_op, resize_op], input_columns=["image"]) data1 = data1.batch(2) # Serialize the dataset pre-processing pipeline. @@ -106,7 +104,7 @@ def test_mnist_dataset(remove_json_files=True): data1 = ds.MnistDataset(data_dir, num_samples=100) one_hot_encode = c.OneHot(10) # num_classes is input argument - data1 = data1.map(input_columns="label", operations=one_hot_encode) + data1 = data1.map(operations=one_hot_encode, input_columns="label") # batch_size is input argument data1 = data1.batch(batch_size=10, drop_remainder=True) @@ -187,8 +185,8 @@ def test_random_crop(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) decode_op = vision.Decode() random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200]) - data1 = data1.map(input_columns="image", operations=decode_op) - data1 = data1.map(input_columns="image", operations=random_crop_op) + data1 = data1.map(operations=decode_op, input_columns="image") + data1 = data1.map(operations=random_crop_op, input_columns="image") # Serializing into python dictionary ds1_dict = ds.serialize(data1) @@ -200,7 +198,7 @@ def test_random_crop(): # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) - data2 = data2.map(input_columns="image", operations=decode_op) + data2 = data2.map(operations=decode_op, input_columns="image") for item1, item1_1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data1_1.create_dict_iterator(num_epochs=1), diff --git a/tests/ut/python/dataset/test_skip.py b/tests/ut/python/dataset/test_skip.py index 98fefed67e..55b13160c2 100644 --- a/tests/ut/python/dataset/test_skip.py +++ b/tests/ut/python/dataset/test_skip.py @@ -33,8 +33,8 @@ def test_tf_skip(): resize_height, resize_width = 32, 32 decode_op = vision.Decode() resize_op = vision.Resize((resize_height, resize_width), interpolation=ds.transforms.vision.Inter.LINEAR) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=resize_op) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=resize_op, input_columns=["image"]) data1 = data1.skip(2) num_iter = 0 diff --git a/tests/ut/python/dataset/test_sliding_window.py b/tests/ut/python/dataset/test_sliding_window.py index ab4eeeacdd..8588f6dec6 100644 --- a/tests/ut/python/dataset/test_sliding_window.py +++ b/tests/ut/python/dataset/test_sliding_window.py @@ -19,13 +19,14 @@ import numpy as np import mindspore.dataset as ds import mindspore.dataset.text as text + def test_sliding_window_string(): """ test sliding_window with string type""" inputs = [["大", "家", "早", "上", "好"]] expect = np.array([['大', '家'], ['家', '早'], ['早', '上'], ['上', '好']]) dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False) - dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(2, 0)) + dataset = dataset.map(operations=text.SlidingWindow(2, 0), input_columns=["text"]) result = [] for data in dataset.create_dict_iterator(num_epochs=1): @@ -36,6 +37,7 @@ def test_sliding_window_string(): result = np.array(result) np.testing.assert_array_equal(result, expect) + def test_sliding_window_number(): inputs = [1] expect = np.array([[1]]) @@ -44,21 +46,23 @@ def test_sliding_window_number(): yield (np.array(nums),) dataset = ds.GeneratorDataset(gen(inputs), column_names=["number"]) - dataset = dataset.map(input_columns=["number"], operations=text.SlidingWindow(1, -1)) + dataset = dataset.map(operations=text.SlidingWindow(1, -1), input_columns=["number"]) for data in dataset.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(data['number'], expect) + def test_sliding_window_big_width(): inputs = [[1, 2, 3, 4, 5]] expect = np.array([]) dataset = ds.NumpySlicesDataset(inputs, column_names=["number"], shuffle=False) - dataset = dataset.map(input_columns=["number"], operations=text.SlidingWindow(30, 0)) + dataset = dataset.map(operations=text.SlidingWindow(30, 0), input_columns=["number"]) for data in dataset.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(data['number'], expect) + def test_sliding_window_exception(): try: _ = text.SlidingWindow(0, 0) @@ -81,7 +85,7 @@ def test_sliding_window_exception(): try: inputs = [[1, 2, 3, 4, 5]] dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False) - dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(3, -100)) + dataset = dataset.map(operations=text.SlidingWindow(3, -100), input_columns=["text"]) for _ in dataset.create_dict_iterator(num_epochs=1): pass assert False @@ -91,13 +95,14 @@ def test_sliding_window_exception(): try: inputs = ["aa", "bb", "cc"] dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False) - dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(2, 0)) + dataset = dataset.map(operations=text.SlidingWindow(2, 0), input_columns=["text"]) for _ in dataset.create_dict_iterator(num_epochs=1): pass assert False except RuntimeError as e: assert "SlidingWindosOp supports 1D Tensors only for now." in str(e) + if __name__ == '__main__': test_sliding_window_string() test_sliding_window_number() diff --git a/tests/ut/python/dataset/test_soft_dvpp.py b/tests/ut/python/dataset/test_soft_dvpp.py index d26ea716f7..f84da207b1 100644 --- a/tests/ut/python/dataset/test_soft_dvpp.py +++ b/tests/ut/python/dataset/test_soft_dvpp.py @@ -34,12 +34,12 @@ def test_soft_dvpp_decode_resize_jpeg(plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() resize_op = vision.Resize((256, 512)) - data1 = data1.map(input_columns=["image"], operations=[decode_op, resize_op]) + data1 = data1.map(operations=[decode_op, resize_op], input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) soft_dvpp_decode_resize_op = vision.SoftDvppDecodeResizeJpeg((256, 512)) - data2 = data2.map(input_columns=["image"], operations=soft_dvpp_decode_resize_op) + data2 = data2.map(operations=soft_dvpp_decode_resize_op, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -64,12 +64,12 @@ def test_soft_dvpp_decode_random_crop_resize_jpeg(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_crop_decode_resize_op = vision.RandomCropDecodeResize((256, 512), (1, 1), (0.5, 0.5)) - data1 = data1.map(input_columns=["image"], operations=random_crop_decode_resize_op) + data1 = data1.map(operations=random_crop_decode_resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) soft_dvpp_random_crop_decode_resize_op = vision.SoftDvppDecodeRandomCropResizeJpeg((256, 512), (1, 1), (0.5, 0.5)) - data2 = data2.map(input_columns=["image"], operations=soft_dvpp_random_crop_decode_resize_op) + data2 = data2.map(operations=soft_dvpp_random_crop_decode_resize_op, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -95,12 +95,12 @@ def test_soft_dvpp_decode_resize_jpeg_supplement(plot=False): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() resize_op = vision.Resize(1134) - data1 = data1.map(input_columns=["image"], operations=[decode_op, resize_op]) + data1 = data1.map(operations=[decode_op, resize_op], input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) soft_dvpp_decode_resize_op = vision.SoftDvppDecodeResizeJpeg(1134) - data2 = data2.map(input_columns=["image"], operations=soft_dvpp_decode_resize_op) + data2 = data2.map(operations=soft_dvpp_decode_resize_op, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): diff --git a/tests/ut/python/dataset/test_sync_wait.py b/tests/ut/python/dataset/test_sync_wait.py index b64cae79ac..f57355c905 100644 --- a/tests/ut/python/dataset/test_sync_wait.py +++ b/tests/ut/python/dataset/test_sync_wait.py @@ -45,7 +45,7 @@ def test_simple_sync_wait(): aug = Augment(0) dataset = dataset.sync_wait(condition_name="policy", callback=aug.update) - dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) + dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) dataset = dataset.batch(batch_size) count = 0 for data in dataset.create_dict_iterator(num_epochs=1): @@ -68,7 +68,7 @@ def test_simple_shuffle_sync(): aug = Augment(0) dataset = dataset.shuffle(shuffle_size) dataset = dataset.sync_wait(condition_name="policy", callback=aug.update) - dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) + dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) dataset = dataset.batch(batch_size) count = 0 @@ -91,7 +91,7 @@ def test_two_sync(): # notice that with our design, we need to have step_size = shuffle size dataset = dataset.sync_wait(condition_name="every batch", callback=aug.update) - dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) + dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) dataset = dataset.sync_wait(num_batch=2, condition_name="every 2 batches") @@ -116,7 +116,7 @@ def test_sync_epoch(): aug = Augment(0) dataset = dataset.sync_wait(condition_name="policy", callback=aug.update) - dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) + dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) dataset = dataset.batch(batch_size, drop_remainder=True) for _ in range(3): @@ -139,14 +139,14 @@ def test_multiple_iterators(): aug = Augment(0) dataset = dataset.sync_wait(condition_name="policy", callback=aug.update) - dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) + dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) dataset = dataset.batch(batch_size, drop_remainder=True) # 2nd dataset dataset2 = ds.GeneratorDataset(gen, column_names=["input"]) aug = Augment(0) dataset2 = dataset2.sync_wait(condition_name="policy", callback=aug.update) - dataset2 = dataset2.map(input_columns=["input"], operations=[aug.preprocess]) + dataset2 = dataset2.map(operations=[aug.preprocess], input_columns=["input"]) dataset2 = dataset2.batch(batch_size, drop_remainder=True) for item1, item2 in zip(dataset.create_dict_iterator(num_epochs=1), dataset2.create_dict_iterator(num_epochs=1)): @@ -168,7 +168,7 @@ def test_sync_exception_01(): aug = Augment(0) dataset = dataset.sync_wait(condition_name="policy", callback=aug.update) - dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) + dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) with pytest.raises(RuntimeError) as e: dataset.shuffle(shuffle_size) @@ -186,7 +186,7 @@ def test_sync_exception_02(): aug = Augment(0) dataset = dataset.sync_wait(condition_name="every batch", callback=aug.update) - dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) + dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) with pytest.raises(RuntimeError) as e: dataset.sync_wait(num_batch=2, condition_name="every batch") @@ -219,7 +219,7 @@ def test_sync_exception_04(): aug = Augment(0) # try to create dataset with batch_size < 0 dataset = dataset.sync_wait(condition_name="every batch", callback=aug.update) - dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) + dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) count = 0 with pytest.raises(RuntimeError) as e: for _ in dataset.create_dict_iterator(num_epochs=1): @@ -240,7 +240,7 @@ def test_sync_exception_05(): aug = Augment(0) # try to create dataset with batch_size < 0 dataset = dataset.sync_wait(condition_name="every batch", callback=aug.update) - dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) + dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) with pytest.raises(RuntimeError) as e: for _ in dataset.create_dict_iterator(num_epochs=1): dataset.disable_sync() diff --git a/tests/ut/python/dataset/test_ten_crop.py b/tests/ut/python/dataset/test_ten_crop.py index 6728868d35..3dd16734a1 100644 --- a/tests/ut/python/dataset/test_ten_crop.py +++ b/tests/ut/python/dataset/test_ten_crop.py @@ -39,7 +39,7 @@ def util_test_ten_crop(crop_size, vertical_flip=False, plot=False): vision.ToTensor(), ] transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) - data1 = data1.map(input_columns=["image"], operations=transform_1) + data1 = data1.map(operations=transform_1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -49,7 +49,7 @@ def util_test_ten_crop(crop_size, vertical_flip=False, plot=False): lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 10 images ] transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2) - data2 = data2.map(input_columns=["image"], operations=transform_2) + data2 = data2.map(operations=transform_2, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 @@ -111,7 +111,7 @@ def test_ten_crop_md5(): lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 10 images ] transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2) - data2 = data2.map(input_columns=["image"], operations=transform_2) + data2 = data2.map(operations=transform_2, input_columns=["image"]) # Compare with expected md5 from images filename = "ten_crop_01_result.npz" save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN) @@ -171,7 +171,7 @@ def test_ten_crop_wrong_img_error_msg(): vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) with pytest.raises(RuntimeError) as info: data.create_tuple_iterator(num_epochs=1).get_next() diff --git a/tests/ut/python/dataset/test_tensor_empty.py b/tests/ut/python/dataset/test_tensor_empty.py index f681055544..7d156305c6 100644 --- a/tests/ut/python/dataset/test_tensor_empty.py +++ b/tests/ut/python/dataset/test_tensor_empty.py @@ -44,7 +44,7 @@ def test_tensor_empty_map(): z = np.array([], dtype=np.float64) return x, y, z - data = data.map(input_columns=["col1", "col2", "col3"], operations=func) + data = data.map(operations=func, input_columns=["col1", "col2", "col3"]) for d in data: np.testing.assert_array_equal(np.array([1], dtype=np.int64), d[0]) diff --git a/tests/ut/python/dataset/test_tensor_string.py b/tests/ut/python/dataset/test_tensor_string.py index 0d821f683d..94ed6a86bc 100644 --- a/tests/ut/python/dataset/test_tensor_string.py +++ b/tests/ut/python/dataset/test_tensor_string.py @@ -94,7 +94,7 @@ def test_map(): splits = s.item().split() return np.array(splits) - data = data.map(input_columns=["col"], operations=split) + data = data.map(operations=split, input_columns=["col"]) expected = np.array(["ab", "cde", "121"], dtype='S') for d in data: np.testing.assert_array_equal(d[0], expected) @@ -110,7 +110,7 @@ def test_map2(): out = np.char.upper(b) return out - data = data.map(input_columns=["col"], operations=upper) + data = data.map(operations=upper, input_columns=["col"]) expected = np.array(["AB CDE 121"], dtype='S') for d in data: np.testing.assert_array_equal(d[0], expected) diff --git a/tests/ut/python/dataset/test_text_basic_tokenizer.py b/tests/ut/python/dataset/test_text_basic_tokenizer.py index d5879b70f3..5d12f2940d 100644 --- a/tests/ut/python/dataset/test_text_basic_tokenizer.py +++ b/tests/ut/python/dataset/test_text_basic_tokenizer.py @@ -105,8 +105,9 @@ def check_basic_tokenizer_with_offsets(first, last, expected_tokens, expected_of preserve_unused_token=preserve_unused_token, with_offsets=True) - dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], - column_order=['token', 'offsets_start', 'offsets_limit'], operations=basic_tokenizer) + dataset = dataset.map(operations=basic_tokenizer, input_columns=['text'], + output_columns=['token', 'offsets_start', 'offsets_limit'], + column_order=['token', 'offsets_start', 'offsets_limit']) count = 0 for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']) diff --git a/tests/ut/python/dataset/test_text_bert_tokenizer.py b/tests/ut/python/dataset/test_text_bert_tokenizer.py index 33fecaca0f..a2b8b9c637 100644 --- a/tests/ut/python/dataset/test_text_bert_tokenizer.py +++ b/tests/ut/python/dataset/test_text_bert_tokenizer.py @@ -212,8 +212,9 @@ def check_bert_tokenizer_with_offsets(first, last, expect_str, vocab=vocab, suffix_indicator=suffix_indicator, max_bytes_per_token=max_bytes_per_token, unknown_token=unknown_token, lower_case=lower_case, keep_whitespace=keep_whitespace, normalization_form=normalization_form, preserve_unused_token=preserve_unused_token, with_offsets=True) - dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], - column_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op) + dataset = dataset.map(operations=tokenizer_op, input_columns=['text'], + output_columns=['token', 'offsets_start', 'offsets_limit'], + column_order=['token', 'offsets_start', 'offsets_limit']) count = 0 for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']) diff --git a/tests/ut/python/dataset/test_text_jieba_tokenizer.py b/tests/ut/python/dataset/test_text_jieba_tokenizer.py index c334bbbbed..5199151826 100644 --- a/tests/ut/python/dataset/test_text_jieba_tokenizer.py +++ b/tests/ut/python/dataset/test_text_jieba_tokenizer.py @@ -28,8 +28,8 @@ def test_jieba_1(): """Test jieba tokenizer with MP mode""" data = ds.TextFileDataset(DATA_FILE) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP) - data = data.map(input_columns=["text"], - operations=jieba_op, num_parallel_workers=1) + data = data.map(operations=jieba_op, input_columns=["text"], + num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] ret = [] for i in data.create_dict_iterator(num_epochs=1): @@ -42,8 +42,8 @@ def test_jieba_1_1(): """Test jieba tokenizer with HMM mode""" data = ds.TextFileDataset(DATA_FILE) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM) - data = data.map(input_columns=["text"], - operations=jieba_op, num_parallel_workers=1) + data = data.map(operations=jieba_op, input_columns=["text"], + num_parallel_workers=1) expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧'] for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) @@ -55,8 +55,8 @@ def test_jieba_1_2(): """Test jieba tokenizer with HMM MIX""" data = ds.TextFileDataset(DATA_FILE) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MIX) - data = data.map(input_columns=["text"], - operations=jieba_op, num_parallel_workers=1) + data = data.map(operations=jieba_op, input_columns=["text"], + num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) @@ -71,8 +71,8 @@ def test_jieba_2(): jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP) jieba_op.add_word("男默女泪") expect = ['男默女泪', '市', '长江大桥'] - data = data.map(input_columns=["text"], - operations=jieba_op, num_parallel_workers=2) + data = data.map(operations=jieba_op, input_columns=["text"], + num_parallel_workers=2) for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): @@ -85,8 +85,8 @@ def test_jieba_2_1(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP) jieba_op.add_word("男默女泪", 10) - data = data.map(input_columns=["text"], - operations=jieba_op, num_parallel_workers=2) + data = data.map(operations=jieba_op, input_columns=["text"], + num_parallel_workers=2) expect = ['男默女泪', '市', '长江大桥'] for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) @@ -109,8 +109,8 @@ def test_jieba_2_3(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP) jieba_op.add_word("江大桥", 20000) - data = data.map(input_columns=["text"], - operations=jieba_op, num_parallel_workers=2) + data = data.map(operations=jieba_op, input_columns=["text"], + num_parallel_workers=2) expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) @@ -127,8 +127,8 @@ def test_jieba_3(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP) jieba_op.add_dict(user_dict) - data = data.map(input_columns=["text"], - operations=jieba_op, num_parallel_workers=1) + data = data.map(operations=jieba_op, input_columns=["text"], + num_parallel_workers=1) expect = ['男默女泪', '市', '长江大桥'] for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) @@ -146,8 +146,8 @@ def test_jieba_3_1(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP) jieba_op.add_dict(user_dict) - data = data.map(input_columns=["text"], - operations=jieba_op, num_parallel_workers=1) + data = data.map(operations=jieba_op, input_columns=["text"], + num_parallel_workers=1) expect = ['男默女泪', '市长', '江大桥'] for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) @@ -162,8 +162,8 @@ def test_jieba_4(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP) jieba_op.add_dict(DICT_FILE) - data = data.map(input_columns=["text"], - operations=jieba_op, num_parallel_workers=1) + data = data.map(operations=jieba_op, input_columns=["text"], + num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) @@ -188,8 +188,8 @@ def test_jieba_5(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP) jieba_op.add_word("江大桥", 20000) - data = data.map(input_columns=["text"], - operations=jieba_op, num_parallel_workers=1) + data = data.map(operations=jieba_op, input_columns=["text"], + num_parallel_workers=1) expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) @@ -201,9 +201,10 @@ def test_jieba_with_offsets_1(): """Test jieba tokenizer with MP mode""" data = ds.TextFileDataset(DATA_FILE) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True) - data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + data = data.map(operations=jieba_op, input_columns=["text"], + output_columns=["token", "offsets_start", "offsets_limit"], column_order=["token", "offsets_start", "offsets_limit"], - operations=jieba_op, num_parallel_workers=1) + num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] expected_offsets_start = [0, 12, 21, 27, 33, 36, 42] expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48] @@ -222,9 +223,10 @@ def test_jieba_with_offsets_1_1(): """Test jieba tokenizer with HMM mode""" data = ds.TextFileDataset(DATA_FILE) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM, with_offsets=True) - data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + data = data.map(operations=jieba_op, input_columns=["text"], + output_columns=["token", "offsets_start", "offsets_limit"], column_order=["token", "offsets_start", "offsets_limit"], - operations=jieba_op, num_parallel_workers=1) + num_parallel_workers=1) expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧'] expected_offsets_start = [0, 6, 12, 15, 18, 21, 27, 33, 36, 42, 45] expected_offsets_limit = [6, 12, 15, 18, 21, 27, 33, 36, 42, 45, 48] @@ -242,9 +244,10 @@ def test_jieba_with_offsets_1_2(): """Test jieba tokenizer with HMM MIX""" data = ds.TextFileDataset(DATA_FILE) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MIX, with_offsets=True) - data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + data = data.map(operations=jieba_op, input_columns=["text"], + output_columns=["token", "offsets_start", "offsets_limit"], column_order=["token", "offsets_start", "offsets_limit"], - operations=jieba_op, num_parallel_workers=1) + num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] expected_offsets_start = [0, 12, 21, 27, 33, 36, 42] expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48] @@ -265,9 +268,10 @@ def test_jieba_with_offsets_2(): jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True) jieba_op.add_word("男默女泪") expect = ['男默女泪', '市', '长江大桥'] - data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + data = data.map(operations=jieba_op, input_columns=["text"], + output_columns=["token", "offsets_start", "offsets_limit"], column_order=["token", "offsets_start", "offsets_limit"], - operations=jieba_op, num_parallel_workers=2) + num_parallel_workers=2) expected_offsets_start = [0, 12, 15] expected_offsets_limit = [12, 15, 27] for i in data.create_dict_iterator(num_epochs=1): @@ -286,9 +290,10 @@ def test_jieba_with_offsets_2_1(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True) jieba_op.add_word("男默女泪", 10) - data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + data = data.map(operations=jieba_op, input_columns=["text"], + output_columns=["token", "offsets_start", "offsets_limit"], column_order=["token", "offsets_start", "offsets_limit"], - operations=jieba_op, num_parallel_workers=2) + num_parallel_workers=2) expect = ['男默女泪', '市', '长江大桥'] expected_offsets_start = [0, 12, 15] expected_offsets_limit = [12, 15, 27] @@ -308,9 +313,10 @@ def test_jieba_with_offsets_2_2(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True) jieba_op.add_word("江大桥", 20000) - data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + data = data.map(operations=jieba_op, input_columns=["text"], + output_columns=["token", "offsets_start", "offsets_limit"], column_order=["token", "offsets_start", "offsets_limit"], - operations=jieba_op, num_parallel_workers=2) + num_parallel_workers=2) expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51] expected_offsets_limit = [6, 12, 21, 27, 30, 42, 45, 51, 57] @@ -333,9 +339,10 @@ def test_jieba_with_offsets_3(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True) jieba_op.add_dict(user_dict) - data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + data = data.map(operations=jieba_op, input_columns=["text"], + output_columns=["token", "offsets_start", "offsets_limit"], column_order=["token", "offsets_start", "offsets_limit"], - operations=jieba_op, num_parallel_workers=1) + num_parallel_workers=1) expect = ['男默女泪', '市', '长江大桥'] expected_offsets_start = [0, 12, 15] expected_offsets_limit = [12, 15, 27] @@ -359,9 +366,10 @@ def test_jieba_with_offsets_3_1(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True) jieba_op.add_dict(user_dict) - data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + data = data.map(operations=jieba_op, input_columns=["text"], + output_columns=["token", "offsets_start", "offsets_limit"], column_order=["token", "offsets_start", "offsets_limit"], - operations=jieba_op, num_parallel_workers=1) + num_parallel_workers=1) expect = ['男默女泪', '市长', '江大桥'] expected_offsets_start = [0, 12, 18] expected_offsets_limit = [12, 18, 27] @@ -382,9 +390,10 @@ def test_jieba_with_offsets_4(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True) jieba_op.add_dict(DICT_FILE) - data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + data = data.map(operations=jieba_op, input_columns=["text"], + output_columns=["token", "offsets_start", "offsets_limit"], column_order=["token", "offsets_start", "offsets_limit"], - operations=jieba_op, num_parallel_workers=1) + num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] expected_offsets_start = [0, 12, 21, 27, 33, 36, 42] expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48] @@ -405,9 +414,10 @@ def test_jieba_with_offsets_5(): data = ds.TextFileDataset(DATA_FILE4) jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True) jieba_op.add_word("江大桥", 20000) - data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + data = data.map(operations=jieba_op, input_columns=["text"], + output_columns=["token", "offsets_start", "offsets_limit"], column_order=["token", "offsets_start", "offsets_limit"], - operations=jieba_op, num_parallel_workers=1) + num_parallel_workers=1) expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51] expected_offsets_limit = [6, 12, 21, 27, 30, 42, 45, 51, 57] @@ -420,6 +430,7 @@ def test_jieba_with_offsets_5(): for index, item in enumerate(i["offsets_limit"]): assert item == expected_offsets_limit[index] + def gen(): text = np.array("今天天气太好了我们一起去外面玩吧".encode("UTF8"), dtype='S') yield (text,) @@ -436,8 +447,8 @@ def pytoken_op(input_data): def test_jieba_6(): data = ds.GeneratorDataset(gen, column_names=["text"]) - data = data.map(input_columns=["text"], - operations=pytoken_op, num_parallel_workers=1) + data = data.map(operations=pytoken_op, input_columns=["text"], + num_parallel_workers=1) expect = ['今天天气太', '好了我们一', '起去外面玩吧'] for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) diff --git a/tests/ut/python/dataset/test_text_tokenizer.py b/tests/ut/python/dataset/test_text_tokenizer.py index 972a551423..a22ee4f2f2 100644 --- a/tests/ut/python/dataset/test_text_tokenizer.py +++ b/tests/ut/python/dataset/test_text_tokenizer.py @@ -59,8 +59,9 @@ def test_unicode_char_tokenizer_with_offsets(): input_strs = ("Welcome to Beijing!", "北京欢迎您!", "我喜欢English!", " ") dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) tokenizer = text.UnicodeCharTokenizer(with_offsets=True) - dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], - column_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer) + dataset = dataset.map(operations=tokenizer, input_columns=['text'], + output_columns=['token', 'offsets_start', 'offsets_limit'], + column_order=['token', 'offsets_start', 'offsets_limit']) tokens = [] expected_offsets_start = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [0, 3, 6, 9, 12, 15], [0, 3, 6, 9, 10, 11, 12, 13, 14, 15, 16], [0, 1]] @@ -106,8 +107,9 @@ def test_whitespace_tokenizer_with_offsets(): [""]] dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) tokenizer = text.WhitespaceTokenizer(with_offsets=True) - dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], - column_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer) + dataset = dataset.map(operations=tokenizer, input_columns=['text'], + output_columns=['token', 'offsets_start', 'offsets_limit'], + column_order=['token', 'offsets_start', 'offsets_limit']) tokens = [] expected_offsets_start = [[0, 8, 11], [0], [0], [0]] expected_offsets_limit = [[7, 10, 19], [18], [17], [0]] @@ -172,8 +174,9 @@ def test_unicode_script_tokenizer_with_offsets(): [""]] dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=False, with_offsets=True) - dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], - column_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer) + dataset = dataset.map(operations=tokenizer, input_columns=['text'], + output_columns=['token', 'offsets_start', 'offsets_limit'], + column_order=['token', 'offsets_start', 'offsets_limit']) tokens = [] expected_offsets_start = [[0, 8, 11, 18], [0, 15], [0, 9, 16], [0]] expected_offsets_limit = [[7, 10, 18, 19], [15, 18], [9, 16, 17], [0]] @@ -198,8 +201,9 @@ def test_unicode_script_tokenizer_with_offsets2(): [" "]] dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=True) - dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], - column_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer) + dataset = dataset.map(operations=tokenizer, input_columns=['text'], + output_columns=['token', 'offsets_start', 'offsets_limit'], + column_order=['token', 'offsets_start', 'offsets_limit']) tokens = [] expected_offsets_start = [[0, 7, 8, 10, 11, 18], [0, 15], [0, 9, 16], [0]] expected_offsets_limit = [[7, 8, 10, 11, 18, 19], [15, 18], [9, 16, 17], [2]] @@ -339,8 +343,9 @@ def test_regex_tokenizer_with_offsets(): if last >= first: dataset = dataset.take(last - first + 1) tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True) - dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], - column_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op) + dataset = dataset.map(operations=tokenizer_op, input_columns=['text'], + output_columns=['token', 'offsets_start', 'offsets_limit'], + column_order=['token', 'offsets_start', 'offsets_limit']) out_text = [] count = 0 for i in dataset.create_dict_iterator(num_epochs=1): diff --git a/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py b/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py index 64b290c829..b879191170 100644 --- a/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py +++ b/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py @@ -126,8 +126,9 @@ def check_wordpiece_tokenizer_with_offsets(first, last, expect_str, expected_off vocab = text.Vocab.from_list(vocab_list) tokenizer_op = text.WordpieceTokenizer(vocab=vocab, with_offsets=True, unknown_token=unknown_token, max_bytes_per_token=max_bytes_per_token) - dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], - column_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op) + dataset = dataset.map(operations=tokenizer_op, input_columns=['text'], + output_columns=['token', 'offsets_start', 'offsets_limit'], + column_order=['token', 'offsets_start', 'offsets_limit']) count = 0 for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']) diff --git a/tests/ut/python/dataset/test_to_number_op.py b/tests/ut/python/dataset/test_to_number_op.py index 60e409b546..8e65c288e3 100644 --- a/tests/ut/python/dataset/test_to_number_op.py +++ b/tests/ut/python/dataset/test_to_number_op.py @@ -27,6 +27,7 @@ ms_integral_types = [mstype.int8, mstype.int16, mstype.int32, mstype.int64, msty np_non_integral_types = [np.float16, np.float32, np.float64] ms_non_integral_types = [mstype.float16, mstype.float32, mstype.float64] + def string_dataset_generator(strings): for string in strings: yield (np.array(string, dtype='S'),) @@ -38,7 +39,7 @@ def test_to_number_typical_case_integral(): for ms_type, inputs in zip(ms_integral_types, input_strings): dataset = ds.GeneratorDataset(string_dataset_generator(inputs), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_type)) + dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"]) expected_output = [int(string) for string in inputs] output = [] @@ -54,7 +55,7 @@ def test_to_number_typical_case_non_integral(): for ms_type, inputs in zip(ms_non_integral_types, input_strings): dataset = ds.GeneratorDataset(string_dataset_generator(inputs), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_type)) + dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"]) expected_output = [float(string) for string in inputs] output = [] @@ -83,12 +84,12 @@ def test_to_number_out_of_bounds_integral(): type_info = np.iinfo(np_type) input_strings = [str(type_info.max + 10)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_type)) + dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"]) out_of_bounds_error_message_check(dataset, np_type, input_strings[0]) input_strings = [str(type_info.min - 10)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_type)) + dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"]) out_of_bounds_error_message_check(dataset, np_type, input_strings[0]) @@ -97,7 +98,7 @@ def test_to_number_out_of_bounds_non_integral(): input_strings = [above_range[0]] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[0])) + dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[0]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: for _ in dataset.create_dict_iterator(num_epochs=1): @@ -106,7 +107,7 @@ def test_to_number_out_of_bounds_non_integral(): input_strings = [above_range[1]] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[1])) + dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[1]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: for _ in dataset.create_dict_iterator(num_epochs=1): @@ -115,7 +116,7 @@ def test_to_number_out_of_bounds_non_integral(): input_strings = [above_range[2]] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[2])) + dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[2]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: for _ in dataset.create_dict_iterator(num_epochs=1): @@ -126,7 +127,7 @@ def test_to_number_out_of_bounds_non_integral(): input_strings = [below_range[0]] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[0])) + dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[0]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: for _ in dataset.create_dict_iterator(num_epochs=1): @@ -135,7 +136,7 @@ def test_to_number_out_of_bounds_non_integral(): input_strings = [below_range[1]] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[1])) + dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[1]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: for _ in dataset.create_dict_iterator(num_epochs=1): @@ -144,7 +145,7 @@ def test_to_number_out_of_bounds_non_integral(): input_strings = [below_range[2]] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[2])) + dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[2]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: for _ in dataset.create_dict_iterator(num_epochs=1): @@ -157,19 +158,19 @@ def test_to_number_boundaries_integral(): type_info = np.iinfo(np_type) input_strings = [str(type_info.max)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_type)) + dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"]) for data in dataset.create_dict_iterator(num_epochs=1): assert data["strings"] == int(input_strings[0]) input_strings = [str(type_info.min)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_type)) + dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"]) for data in dataset.create_dict_iterator(num_epochs=1): assert data["strings"] == int(input_strings[0]) input_strings = [str(0)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_type)) + dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"]) for data in dataset.create_dict_iterator(num_epochs=1): assert data["strings"] == int(input_strings[0]) @@ -177,7 +178,7 @@ def test_to_number_boundaries_integral(): def test_to_number_invalid_input(): input_strings = ["a8fa9ds8fa"] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") - dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(mstype.int32)) + dataset = dataset.map(operations=text.ToNumber(mstype.int32), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: for _ in dataset.create_dict_iterator(num_epochs=1): diff --git a/tests/ut/python/dataset/test_to_pil.py b/tests/ut/python/dataset/test_to_pil.py index 6060fda8ef..afed565222 100644 --- a/tests/ut/python/dataset/test_to_pil.py +++ b/tests/ut/python/dataset/test_to_pil.py @@ -45,7 +45,7 @@ def test_to_pil_01(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "to_pil_01_result.npz" @@ -68,8 +68,8 @@ def test_to_pil_02(): py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data1 = data1.map(input_columns=["image"], operations=decode_op) - data1 = data1.map(input_columns=["image"], operations=transform) + data1 = data1.map(operations=decode_op, input_columns=["image"]) + data1 = data1.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "to_pil_02_result.npz" diff --git a/tests/ut/python/dataset/test_to_type.py b/tests/ut/python/dataset/test_to_type.py index 9ed668271a..f1fb046fff 100644 --- a/tests/ut/python/dataset/test_to_type.py +++ b/tests/ut/python/dataset/test_to_type.py @@ -43,7 +43,7 @@ def test_to_type_op(): py_vision.ToType(np.int16) ] transform1 = mindspore.dataset.transforms.py_transforms.Compose(transforms1) - data1 = data1.map(input_columns=["image"], operations=transform1) + data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) @@ -52,7 +52,7 @@ def test_to_type_op(): py_vision.ToTensor() ] transform2 = mindspore.dataset.transforms.py_transforms.Compose(transforms2) - data2 = data2.map(input_columns=["image"], operations=transform2) + data2 = data2.map(operations=transform2, input_columns=["image"]) for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = item1["image"] @@ -64,6 +64,7 @@ def test_to_type_op(): assert image2.dtype == np.float32 assert image1.shape == image2.shape + def test_to_type_01(): """ Test ToType Op with md5 comparison: valid input (Numpy dtype) @@ -80,12 +81,13 @@ def test_to_type_01(): py_vision.ToType(np.int32) ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "to_type_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) + def test_to_type_02(): """ Test ToType Op with md5 comparison: valid input (str) @@ -101,12 +103,13 @@ def test_to_type_02(): py_vision.ToType('int') ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "to_type_02_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) + def test_to_type_03(): """ Test ToType Op: invalid input image type @@ -123,11 +126,12 @@ def test_to_type_03(): py_vision.ToType(np.int32) ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) except Exception as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Numpy" in str(e) + def test_to_type_04(): """ Test ToType Op: no output_type given @@ -145,11 +149,12 @@ def test_to_type_04(): py_vision.ToType() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) except Exception as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "missing" in str(e) + def test_to_type_05(): """ Test ToType Op: invalid output_type @@ -167,11 +172,12 @@ def test_to_type_05(): py_vision.ToType('invalid') ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) - data = data.map(input_columns=["image"], operations=transform) + data = data.map(operations=transform, input_columns=["image"]) except Exception as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "data type" in str(e) + if __name__ == "__main__": test_to_type_op() test_to_type_01() diff --git a/tests/ut/python/dataset/test_type_cast.py b/tests/ut/python/dataset/test_type_cast.py index 0547e4810d..359b6d08b2 100644 --- a/tests/ut/python/dataset/test_type_cast.py +++ b/tests/ut/python/dataset/test_type_cast.py @@ -45,7 +45,7 @@ def test_type_cast(): type_cast_op, ] - data1 = data1.map(input_columns=["image"], operations=ctrans) + data1 = data1.map(operations=ctrans, input_columns=["image"]) # Second dataset transforms = [py_vision.Decode(), @@ -53,7 +53,7 @@ def test_type_cast(): ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): @@ -85,7 +85,7 @@ def test_type_cast_string(): type_cast_op ] - data1 = data1.map(input_columns=["image"], operations=ctrans) + data1 = data1.map(operations=ctrans, input_columns=["image"]) # Second dataset transforms = [py_vision.Decode(), @@ -93,7 +93,7 @@ def test_type_cast_string(): ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - data2 = data2.map(input_columns=["image"], operations=transform) + data2 = data2.map(operations=transform, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): diff --git a/tests/ut/python/dataset/test_uniform_augment.py b/tests/ut/python/dataset/test_uniform_augment.py index 49464d924b..cf07d28ff0 100644 --- a/tests/ut/python/dataset/test_uniform_augment.py +++ b/tests/ut/python/dataset/test_uniform_augment.py @@ -41,8 +41,7 @@ def test_uniform_augment(plot=False, num_ops=2): F.Resize((224, 224)), F.ToTensor()]) - ds_original = ds.map(input_columns="image", - operations=transforms_original) + ds_original = ds.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) @@ -71,8 +70,7 @@ def test_uniform_augment(plot=False, num_ops=2): num_ops=num_ops), F.ToTensor()]) - ds_ua = ds.map(input_columns="image", - operations=transforms_ua) + ds_ua = ds.map(operations=transforms_ua, input_columns="image") ds_ua = ds_ua.batch(512) @@ -106,8 +104,7 @@ def test_cpp_uniform_augment(plot=False, num_ops=2): transforms_original = [C.Decode(), C.Resize(size=[224, 224]), F.ToTensor()] - ds_original = ds.map(input_columns="image", - operations=transforms_original) + ds_original = ds.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) @@ -133,8 +130,7 @@ def test_cpp_uniform_augment(plot=False, num_ops=2): uni_aug, F.ToTensor()] - ds_ua = ds.map(input_columns="image", - operations=transforms_all, num_parallel_workers=1) + ds_ua = ds.map(operations=transforms_all, input_columns="image", num_parallel_workers=1) ds_ua = ds_ua.batch(512) @@ -252,7 +248,7 @@ def test_cpp_uniform_augment_random_crop_badinput(num_ops=1): C.RandomHorizontalFlip() ] uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops) - ds1 = ds1.map(input_columns="image", operations=uni_aug) + ds1 = ds1.map(operations=uni_aug, input_columns="image") # apply DatasetOps ds1 = ds1.batch(batch_size, drop_remainder=True, num_parallel_workers=1) diff --git a/tests/ut/python/dataset/test_vocab.py b/tests/ut/python/dataset/test_vocab.py index 5ced80d7fe..afcfa225a6 100644 --- a/tests/ut/python/dataset/test_vocab.py +++ b/tests/ut/python/dataset/test_vocab.py @@ -29,7 +29,7 @@ def test_from_list_tutorial(): vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["", ""], True) lookup = text.Lookup(vocab, "") data = ds.TextFileDataset(DATA_FILE, shuffle=False) - data = data.map(input_columns=["text"], operations=lookup) + data = data.map(operations=lookup, input_columns=["text"]) ind = 0 res = [2, 1, 4, 5, 6, 7] for d in data.create_dict_iterator(num_epochs=1): @@ -41,7 +41,7 @@ def test_from_file_tutorial(): vocab = text.Vocab.from_file(VOCAB_FILE, ",", None, ["", ""], True) lookup = text.Lookup(vocab) data = ds.TextFileDataset(DATA_FILE, shuffle=False) - data = data.map(input_columns=["text"], operations=lookup) + data = data.map(operations=lookup, input_columns=["text"]) ind = 0 res = [10, 11, 12, 15, 13, 14] for d in data.create_dict_iterator(num_epochs=1): @@ -53,7 +53,7 @@ def test_from_dict_tutorial(): vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "": 6}) lookup = text.Lookup(vocab, "") # any unknown token will be mapped to the id of data = ds.TextFileDataset(DATA_FILE, shuffle=False) - data = data.map(input_columns=["text"], operations=lookup) + data = data.map(operations=lookup, input_columns=["text"]) res = [3, 6, 2, 4, 5, 6] ind = 0 for d in data.create_dict_iterator(num_epochs=1): @@ -79,7 +79,7 @@ def test_from_list(): try: vocab = text.Vocab.from_list(vocab_input, special_tokens, special_first) data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"]) - data = data.map(input_columns=["text"], operations=text.Lookup(vocab, unknown_token)) + data = data.map(operations=text.Lookup(vocab, unknown_token), input_columns=["text"]) res = [] for d in data.create_dict_iterator(num_epochs=1): res.append(d["text"].item()) @@ -118,7 +118,7 @@ def test_from_file(): vocab = text.Vocab.from_file(SIMPLE_VOCAB_FILE, vocab_size=vocab_size, special_tokens=special_tokens, special_first=special_first) data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"]) - data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "s2")) + data = data.map(operations=text.Lookup(vocab, "s2"), input_columns=["text"]) res = [] for d in data.create_dict_iterator(num_epochs=1): res.append(d["text"].item()) @@ -150,7 +150,7 @@ def test_lookup_cast_type(): data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"]) # if data_type is None, test the default value of data_type op = text.Lookup(vocab, "") if data_type is None else text.Lookup(vocab, "", data_type) - data = data.map(input_columns=["text"], operations=op) + data = data.map(operations=op, input_columns=["text"]) res = [] for d in data.create_dict_iterator(num_epochs=1): res.append(d["text"]) diff --git a/tests/ut/python/dataset/util.py b/tests/ut/python/dataset/util.py index 64f3fb129c..ea061d2b9c 100644 --- a/tests/ut/python/dataset/util.py +++ b/tests/ut/python/dataset/util.py @@ -284,7 +284,7 @@ def visualize_with_bounding_boxes(orig, aug, annot_name="bbox", plot_rows=3): def add_bounding_boxes(ax, bboxes): for bbox in bboxes: rect = patches.Rectangle((bbox[0], bbox[1]), - bbox[2]*0.997, bbox[3]*0.997, + bbox[2] * 0.997, bbox[3] * 0.997, linewidth=1.80, edgecolor='r', facecolor='none') # Add the patch to the Axes # Params to Rectangle slightly modified to prevent drawing overflow @@ -324,14 +324,14 @@ def visualize_with_bounding_boxes(orig, aug, annot_name="bbox", plot_rows=3): axA.imshow(dataA["image"]) add_bounding_boxes(axA, dataA[annot_name]) - axA.title.set_text("Original" + str(cur_ix+1)) + axA.title.set_text("Original" + str(cur_ix + 1)) axB.imshow(dataB["image"]) add_bounding_boxes(axB, dataB[annot_name]) - axB.title.set_text("Augmented" + str(cur_ix+1)) + axB.title.set_text("Augmented" + str(cur_ix + 1)) - logger.info("Original **\n{} : {}".format(str(cur_ix+1), dataA[annot_name])) - logger.info("Augmented **\n{} : {}\n".format(str(cur_ix+1), dataB[annot_name])) + logger.info("Original **\n{} : {}".format(str(cur_ix + 1), dataA[annot_name])) + logger.info("Augmented **\n{} : {}\n".format(str(cur_ix + 1), dataB[annot_name])) plt.show() @@ -384,22 +384,22 @@ def check_bad_bbox(data, test_op, invalid_bbox_type, expected_error): try: # map to use selected invalid bounding box type - data = data.map(input_columns=["image", "bbox"], + data = data.map(operations=lambda img, bboxes: add_bad_bbox(img, bboxes, invalid_bbox_type), + input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=lambda img, bboxes: add_bad_bbox(img, bboxes, invalid_bbox_type)) + column_order=["image", "bbox"]) # map to apply ops - data = data.map(input_columns=["image", "bbox"], + data = data.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], - column_order=["image", "bbox"], - operations=[test_op]) # Add column for "bbox" + column_order=["image", "bbox"]) # Add column for "bbox" for _, _ in enumerate(data.create_dict_iterator(num_epochs=1)): break except RuntimeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert expected_error in str(error) -#return true if datasets are equal + +# return true if datasets are equal def dataset_equal(data1, data2, mse_threshold): if data1.get_dataset_size() != data2.get_dataset_size(): return False @@ -414,6 +414,7 @@ def dataset_equal(data1, data2, mse_threshold): break return equal + # return true if datasets are equal after modification to target # params: data_unchanged - dataset kept unchanged # data_target - dataset to be modified by foo