change map calls

pull/5968/head
nhussain 4 years ago
parent 77e05e32a4
commit 92e99ff224

@ -282,8 +282,9 @@ class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp):
>>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeCharTokenizer(True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_with_offsets
@ -313,8 +314,9 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp):
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
>>> max_bytes_per_token=100, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op,
>>> input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_wordpiece_tokenizer
@ -378,8 +380,9 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.WhitespaceTokenizer(True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_with_offsets
@ -404,8 +407,9 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_unicode_script_tokenizer
@ -497,8 +501,9 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_regex_tokenizer
@ -540,8 +545,9 @@ if platform.system().lower() != 'windows':
>>> normalization_form=NormalizeForm.NONE,
>>> preserve_unused_token=True,
>>> with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_basic_tokenizer
@ -593,8 +599,9 @@ if platform.system().lower() != 'windows':
>>> unknown_token=100, lower_case=False, keep_whitespace=False,
>>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
>>> with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_bert_tokenizer

@ -39,14 +39,14 @@ def create_dataset_cifar10(data_path, batch_size=32, repeat_size=1, status="trai
random_horizontal_op = CV.RandomHorizontalFlip()
channel_swap_op = CV.HWC2CHW()
typecast_op = C.TypeCast(mstype.int32)
cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op)
cifar_ds = cifar_ds.map(operations=typecast_op, input_columns="label")
if status == "train":
cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op)
cifar_ds = cifar_ds.map(operations=random_crop_op, input_columns="image")
cifar_ds = cifar_ds.map(operations=random_horizontal_op, input_columns="image")
cifar_ds = cifar_ds.map(operations=resize_op, input_columns="image")
cifar_ds = cifar_ds.map(operations=rescale_op, input_columns="image")
cifar_ds = cifar_ds.map(operations=normalize_op, input_columns="image")
cifar_ds = cifar_ds.map(operations=channel_swap_op, input_columns="image")
cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)

@ -84,8 +84,9 @@ class SegDataset:
shuffle=True, num_parallel_workers=self.num_readers,
num_shards=self.shard_num, shard_id=self.shard_id)
transforms_list = self.preprocess_
data_set = data_set.map(input_columns=["data", "label"], output_columns=["data", "label"],
operations=transforms_list, num_parallel_workers=self.num_parallel_calls)
data_set = data_set.map(operations=transforms_list, input_columns=["data", "label"],
output_columns=["data", "label"],
num_parallel_workers=self.num_parallel_calls)
data_set = data_set.shuffle(buffer_size=self.batch_size * 10)
data_set = data_set.batch(self.batch_size, drop_remainder=True)
data_set = data_set.repeat(repeat)

@ -74,8 +74,10 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
ious = ious.T
return ious
class PhotoMetricDistortion:
"""Photo Metric Distortion"""
def __init__(self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
@ -134,8 +136,10 @@ class PhotoMetricDistortion:
return img, boxes, labels
class Expand:
"""expand image"""
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
if to_rgb:
self.mean = mean[::-1]
@ -158,6 +162,7 @@ class Expand:
boxes += np.tile((left, top), 2)
return img, boxes, labels
def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""rescale operation for image"""
img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True)
@ -173,6 +178,7 @@ def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""resize operation for image"""
img_data = img
@ -190,6 +196,7 @@ def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
"""resize operation for image of eval"""
img_data = img
@ -207,18 +214,21 @@ def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""impad operation for image"""
img_data = mmcv.impad(img, (config.img_height, config.img_width))
img_data = img_data.astype(np.float32)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""imnormalize operation for image"""
img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True)
img_data = img_data.astype(np.float32)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""flip operation for image"""
img_data = img
@ -231,6 +241,7 @@ def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, flipped, gt_label, gt_num)
def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num):
"""flipped generation"""
img_data = img
@ -242,10 +253,12 @@ def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, flipped, gt_label, gt_num)
def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num):
img_data = img[:, :, ::-1]
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""transpose operation for image"""
img_data = img.transpose(2, 0, 1).copy()
@ -257,6 +270,7 @@ def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""photo crop operation for image"""
random_photo = PhotoMetricDistortion()
@ -264,6 +278,7 @@ def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""expand operation for image"""
expand = Expand()
@ -271,8 +286,10 @@ def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img, img_shape, gt_bboxes, gt_label, gt_num)
def preprocess_fn(image, box, is_training):
"""Preprocess function for dataset."""
def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert):
image_shape = image_shape[:2]
input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert
@ -325,6 +342,7 @@ def preprocess_fn(image, box, is_training):
return _data_aug(image, box, is_training)
def create_coco_label(is_training):
"""Get image path and annotation from COCO."""
from pycocotools.coco import COCO
@ -375,6 +393,7 @@ def create_coco_label(is_training):
return image_files, image_anno_dict
def anno_parser(annos_str):
"""Parse annotation from string to list."""
annos = []
@ -383,6 +402,7 @@ def anno_parser(annos_str):
annos.append(anno)
return annos
def filter_valid_data(image_dir, anno_path):
"""Filter valid image file, which both in image_dir and anno_path."""
image_files = []
@ -404,6 +424,7 @@ def filter_valid_data(image_dir, anno_path):
image_files.append(image_path)
return image_files, image_anno_dict
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8):
"""Create MindRecord file."""
mindrecord_dir = config.mindrecord_dir
@ -435,7 +456,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id,
num_parallel_workers=1, shuffle=is_training)
decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode, num_parallel_workers=1)
ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=1)
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
hwc_to_chw = C.HWC2CHW()
@ -447,38 +468,39 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
type_cast3 = CC.TypeCast(mstype.bool_)
if is_training:
ds = ds.map(input_columns=["image", "annotation"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
num_parallel_workers=num_parallel_workers)
flip = (np.random.rand() < config.flip_ratio)
if flip:
ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0, horizontally_op],
ds = ds.map(operations=[normalize_op, type_cast0, horizontally_op], input_columns=["image"],
num_parallel_workers=12)
ds = ds.map(input_columns=["image", "image_shape", "box", "label", "valid_num"],
operations=flipped_generation, num_parallel_workers=num_parallel_workers)
ds = ds.map(operations=flipped_generation,
input_columns=["image", "image_shape", "box", "label", "valid_num"],
num_parallel_workers=num_parallel_workers)
else:
ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0],
ds = ds.map(operations=[normalize_op, type_cast0], input_columns=["image"],
num_parallel_workers=12)
ds = ds.map(input_columns=["image"], operations=[hwc_to_chw, type_cast1],
ds = ds.map(operations=[hwc_to_chw, type_cast1], input_columns=["image"],
num_parallel_workers=12)
else:
ds = ds.map(input_columns=["image", "annotation"],
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func,
num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=[normalize_op, hwc_to_chw, type_cast1],
ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"],
num_parallel_workers=24)
# transpose_column from python to c
ds = ds.map(input_columns=["image_shape"], operations=[type_cast1])
ds = ds.map(input_columns=["box"], operations=[type_cast1])
ds = ds.map(input_columns=["label"], operations=[type_cast2])
ds = ds.map(input_columns=["valid_num"], operations=[type_cast3])
ds = ds.map(operations=[type_cast1], input_columns=["image_shape"])
ds = ds.map(operations=[type_cast1], input_columns=["box"])
ds = ds.map(operations=[type_cast2], input_columns=["label"])
ds = ds.map(operations=[type_cast3], input_columns=["valid_num"])
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)

@ -55,8 +55,8 @@ def create_dataset_cifar10(data_home, repeat_num=1, training=True):
c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]
# apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op)
data_set = data_set.map(input_columns="image", operations=c_trans)
data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(operations=c_trans, input_columns="image")
# apply batch operations
data_set = data_set.batch(batch_size=cifar_cfg.batch_size, drop_remainder=True)

@ -60,8 +60,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
C.HWC2CHW()
]
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums)
# apply batch operations
ds = ds.batch(cfg.batch_size, drop_remainder=True)
# apply dataset repeat operation

@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
buffer_size = 10000

@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
buffer_size = 10000

@ -498,24 +498,24 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id
num_parallel_workers=4, shuffle=is_training)
decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode)
ds = ds.map(operations=decode, input_columns=["image"])
compose_map_func = (lambda image, annotation, mask, mask_shape:
preprocess_fn(image, annotation, mask, mask_shape, is_training))
if is_training:
ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"],
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
operations=compose_map_func,
python_multiprocessing=False,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
else:
ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"],
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
operations=compose_map_func,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)

@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1):
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size)

@ -89,8 +89,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=16)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -152,7 +152,7 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num=
compose = P2.Compose(trans)
ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True)
ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

@ -70,8 +70,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size)

@ -61,8 +61,8 @@ def create_dataset(dataset_path, config, do_train, repeat_num=1):
C.HWC2CHW()
]
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=config.work_nums)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=config.work_nums)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=config.work_nums)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=config.work_nums)
# apply batch operations
ds = ds.batch(config.batch_size, drop_remainder=True)
# apply dataset repeat operation

@ -22,6 +22,7 @@ import mindspore.dataset.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2
from mindspore.communication.management import init, get_rank, get_group_size
def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
"""
create a train or evaluate cifar10 dataset for resnet50
@ -65,8 +66,8 @@ def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target=
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -126,8 +127,8 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target=
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -180,8 +181,8 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -190,6 +191,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
return ds
def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
"""
create a train or eval imagenet2012 dataset for se-resnet50
@ -233,8 +235,8 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target=
]
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=12, operations=trans)
ds = ds.map(input_columns="label", num_parallel_workers=12, operations=type_cast_op)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=12)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -244,6 +246,7 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target=
return ds
def _get_rank_info():
"""
get rank size and rank id

@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -147,7 +147,7 @@ def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, targe
trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op]
compose = P2.Compose(trans)
ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True)
ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

@ -75,8 +75,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=num_parallels, operations=trans)
ds = ds.map(input_columns="label", num_parallel_workers=num_parallels, operations=type_cast_op)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=num_parallels)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallels)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

@ -25,6 +25,7 @@ from src.utils.sampler import DistributedSampler
ImageFile.LOAD_TRUNCATED_IMAGES = True
class TxtDataset():
"""
create txt dataset.
@ -33,6 +34,7 @@ class TxtDataset():
Returns:
de_dataset.
"""
def __init__(self, root, txt_name):
super(TxtDataset, self).__init__()
self.imgs = []
@ -142,10 +144,10 @@ def classification_dataset(data_dir, image_size, per_batch_size, max_epoch, rank
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=num_parallel_workers,
operations=transform_img)
de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=num_parallel_workers,
operations=transform_label)
de_dataset = de_dataset.map(operations=transform_img, input_columns="image",
num_parallel_workers=num_parallel_workers)
de_dataset = de_dataset.map(operations=transform_label, input_columns="label",
num_parallel_workers=num_parallel_workers)
columns_to_project = ["image", "label"]
de_dataset = de_dataset.project(columns=columns_to_project)

@ -72,8 +72,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
]
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums)
# apply batch operations
ds = ds.batch(cfg.batch_size, drop_remainder=True)

@ -34,6 +34,7 @@ def _rand(a=0., b=1.):
"""Generate random."""
return np.random.rand() * (b - a) + a
def get_imageId_from_fileName(filename):
"""Get imageID from fileName"""
try:
@ -42,6 +43,7 @@ def get_imageId_from_fileName(filename):
except:
raise NotImplementedError('Filename %s is supposed to be an integer.' % (filename))
def random_sample_crop(image, boxes):
"""Random Crop the image and boxes"""
height, width, _ = image.shape
@ -103,6 +105,7 @@ def random_sample_crop(image, boxes):
def preprocess_fn(img_id, image, box, is_training):
"""Preprocess function for dataset."""
def _infer_data(image, input_shape):
img_h, img_w, _ = image.shape
input_h, input_w = input_shape
@ -150,6 +153,7 @@ def preprocess_fn(img_id, image, box, is_training):
box, label, num_match = ssd_bboxes_encode(box)
return image, box, label, num_match
return _data_aug(image, box, is_training, image_size=config.img_shape)
@ -244,6 +248,7 @@ def create_voc_label(is_training):
return images, image_files_dict, image_anno_dict
def create_coco_label(is_training):
"""Get image path and annotation from COCO."""
from pycocotools.coco import COCO
@ -391,9 +396,10 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num
ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num,
shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training)
decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode)
ds = ds.map(operations=decode, input_columns=["image"])
change_swap_op = C.HWC2CHW()
normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255])
normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training))
if is_training:
@ -402,11 +408,11 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num
else:
output_columns = ["img_id", "image", "image_shape"]
trans = [normalize_op, change_swap_op]
ds = ds.map(input_columns=["img_id", "image", "annotation"],
ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"],
output_columns=output_columns, column_order=output_columns,
operations=compose_map_func, python_multiprocessing=is_training,
python_multiprocessing=is_training,
num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training,
ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)

@ -54,8 +54,8 @@ def vgg_create_dataset(data_home, image_size, batch_size, rank_id=0, rank_size=1
changeswap_op]
# apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op)
data_set = data_set.map(input_columns="image", operations=c_trans)
data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(operations=c_trans, input_columns="image")
# apply repeat operations
data_set = data_set.repeat(repeat_num)
@ -157,8 +157,8 @@ def classification_dataset(data_dir, image_size, per_batch_size, rank=0, group_s
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=8, operations=transform_img)
de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=8, operations=transform_label)
de_dataset = de_dataset.map(operations=transform_img, input_columns="image", num_parallel_workers=8)
de_dataset = de_dataset.map(operations=transform_label, input_columns="label", num_parallel_workers=8)
columns_to_project = ["image", "label"]
de_dataset = de_dataset.project(columns=columns_to_project)

@ -90,8 +90,8 @@ def create_dataset(dataset_path, batch_size=1, num_shards=1, shard_id=0, device_
label_trans = [
c.TypeCast(mstype.int32)
]
ds = ds.map(input_columns=["image"], num_parallel_workers=8, operations=image_trans)
ds = ds.map(input_columns=["label"], num_parallel_workers=8, operations=label_trans)
ds = ds.map(operations=image_trans, input_columns=["image"], num_parallel_workers=8)
ds = ds.map(operations=label_trans, input_columns=["label"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True)
return ds

@ -176,11 +176,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"],
sampler=distributed_sampler)
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
ds = ds.map(input_columns=["image", "img_id"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"],
operations=compose_map_func, num_parallel_workers=8)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8)
num_parallel_workers=8)
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(max_epoch)

@ -173,11 +173,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"],
sampler=distributed_sampler)
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
ds = ds.map(input_columns=["image", "img_id"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"],
operations=compose_map_func, num_parallel_workers=8)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8)
num_parallel_workers=8)
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(max_epoch)

@ -296,21 +296,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=1, device_num=
ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank,
num_parallel_workers=num_parallel_workers, shuffle=is_training)
decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode)
ds = ds.map(operations=decode, input_columns=["image"])
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
if is_training:
hwc_to_chw = C.HWC2CHW()
ds = ds.map(input_columns=["image", "annotation"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers)
num_parallel_workers=num_parallel_workers)
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)
else:
ds = ds.map(input_columns=["image", "annotation"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "annotation"],
column_order=["image", "image_shape", "annotation"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
num_parallel_workers=num_parallel_workers)
return ds

@ -112,12 +112,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''):
"masked_lm_weights",
"next_sentence_labels"])
type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.repeat(repeat_count)
# apply batch operations

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save