minddata iterator output ms_tensor

pull/5801/head
xiefangqi 4 years ago
parent a778868a5a
commit 9b3c33e157

@ -394,7 +394,7 @@ class Dataset:
logger.error("func must be a function.")
raise TypeError("func must be a function.")
for row_data in self:
for row_data in self.create_tuple_iterator(output_numpy=True):
if dataset is None:
dataset = func(row_data)
else:
@ -1133,7 +1133,7 @@ class Dataset:
return SaveOp(self).save(file_names, file_type)
def create_tuple_iterator(self, columns=None, num_epochs=-1):
def create_tuple_iterator(self, columns=None, num_epochs=-1, output_numpy=False):
"""
Create an Iterator over the dataset. The data retrieved will be a list of ndarray of data.
@ -1143,8 +1143,11 @@ class Dataset:
Args:
columns (list[str], optional): List of columns to be used to specify the order of columns
(default=None, means all columns).
num_epochs (int, optional): max epochs that iterator can be iteratered,
if num_epochs = -1, iterator can be iteratered infinit epochs (default=-1)
num_epochs (int, optional): maximum epochs that iterator can be iteratered,
if num_epochs = -1, iterator can be iteratered infinite epochs (default=-1)
output_numpy (bool, optional): Whether or not to output NumPy datatype,
if output_numpy=False, iterator will output MSTensor (default=False).
Returns:
Iterator, list of ndarray.
@ -1161,9 +1164,9 @@ class Dataset:
"""
if self._noop_mode():
return DummyIterator(self, 'tuple')
return TupleIterator(self, columns, num_epochs)
return TupleIterator(self, columns, num_epochs, output_numpy)
def create_dict_iterator(self, num_epochs=-1):
def create_dict_iterator(self, num_epochs=-1, output_numpy=False):
"""
Create an Iterator over the dataset.
@ -1171,8 +1174,10 @@ class Dataset:
of the columns in the dictionary may not be the same as the original order.
Args:
num_epochs (int, optional): max epochs that iterator can be iteratered,
if num_epochs = -1, iterator can be iteratered infinit epochs (default=-1)
num_epochs (int, optional): maximum epochs that iterator can be iteratered,
if num_epochs = -1, iterator can be iteratered infinite epochs (default=-1)
output_numpy (bool, optional): Whether or not to output NumPy datatype,
if output_numpy=False, iterator will output MSTensor (default=False).
Returns:
Iterator, dictionary of column_name-ndarray pair.
@ -1190,7 +1195,7 @@ class Dataset:
"""
if self._noop_mode():
return DummyIterator(self, 'dict')
return DictIterator(self, num_epochs)
return DictIterator(self, num_epochs, output_numpy)
def __iter__(self):
"""Create an Iterator over the dataset."""
@ -1617,7 +1622,7 @@ class BucketBatchByLengthDataset(DatasetOp):
"""
if self.dataset_size is None:
num_rows = 0
for _ in self.create_dict_iterator(num_epochs=1):
for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True):
num_rows += 1
self.dataset_size = num_rows
return self.dataset_size
@ -2163,7 +2168,7 @@ class FilterDataset(DatasetOp):
"""
if self.dataset_size is None:
num_rows = 0
for _ in self.create_dict_iterator(num_epochs=1):
for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True):
num_rows += 1
self.dataset_size = num_rows
return self.dataset_size
@ -2400,7 +2405,7 @@ class ConcatDataset(DatasetOp):
"""
if self.dataset_size is None:
num_rows = 0
for _ in self.create_dict_iterator(num_epochs=1):
for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True):
num_rows += 1
self.dataset_size = num_rows
return self.dataset_size
@ -3495,7 +3500,7 @@ class GeneratorDataset(MappableDataset):
self.dataset_size = rows_from_sampler
else:
num_rows = 0
for _ in self.create_dict_iterator(num_epochs=1):
for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True):
num_rows += 1
self.dataset_size = num_rows
return self.dataset_size

@ -67,8 +67,9 @@ class Iterator:
dataset: Dataset to be iterated over
"""
def __init__(self, dataset, num_epochs=-1):
def __init__(self, dataset, num_epochs=-1, output_numpy=False):
self.num_epochs = num_epochs
self.output_numpy = output_numpy
ITERATORS_LIST.append(weakref.ref(self))
# create a copy of tree and work on it.
self.dataset = copy.deepcopy(dataset)
@ -305,8 +306,8 @@ class DictIterator(Iterator):
"""
The derived class of Iterator with dict type.
"""
def __init__(self, dataset, num_epochs=-1):
super().__init__(dataset, num_epochs)
def __init__(self, dataset, num_epochs=-1, output_numpy=False):
super().__init__(dataset, num_epochs, output_numpy)
self.depipeline.LaunchTreeExec()
def check_node_type(self, node):
@ -323,7 +324,9 @@ class DictIterator(Iterator):
Dict, the next record in the dataset.
"""
return {k: v.as_array() for k, v in self.depipeline.GetNextAsMap().items()}
if self.output_numpy:
return {k: v.as_array() for k, v in self.depipeline.GetNextAsMap().items()}
return {k: Tensor(v.as_array()) for k, v in self.depipeline.GetNextAsMap().items()}
class TupleIterator(Iterator):
@ -333,12 +336,12 @@ class TupleIterator(Iterator):
def check_node_type(self, node):
pass
def __init__(self, dataset, columns=None, num_epochs=-1):
def __init__(self, dataset, columns=None, num_epochs=-1, output_numpy=False):
if columns is not None:
if not isinstance(columns, list):
columns = [columns]
dataset = dataset.project(columns)
super().__init__(dataset, num_epochs)
super().__init__(dataset, num_epochs, output_numpy)
self.depipeline.LaunchTreeExec()
def __iter__(self):
@ -352,7 +355,9 @@ class TupleIterator(Iterator):
List, the next record in the dataset.
"""
return [t.as_array() for t in self.depipeline.GetNextAsList()]
if self.output_numpy:
return [t.as_array() for t in self.depipeline.GetNextAsList()]
return [Tensor(t.as_array()) for t in self.depipeline.GetNextAsList()]
class DummyIterator():

@ -18,8 +18,7 @@ import os
from mindspore._checkparam import check_bool, check_int
from .. import context, nn
from ._utils import _exec_datagraph, _get_types_and_shapes, _to_tensor, \
_construct_tensor_list
from ._utils import _exec_datagraph, _get_types_and_shapes, _construct_tensor_list
from ..nn.wrap import GetNextSingleOp
from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _to_full_shapes
from ..ops import operations as P
@ -297,4 +296,4 @@ class _DatasetIterNormal:
def __next__(self):
data = self.iter.__next__()
return _to_tensor(data)
return data

@ -19,7 +19,7 @@ import argparse
import time
import numpy as np
from pycocotools.coco import COCO
from mindspore import context, Tensor
from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.common import set_seed
@ -68,7 +68,7 @@ def FasterRcnn_eval(dataset_path, ckpt_path, ann_file):
start = time.time()
# run net
output = net(Tensor(img_data), Tensor(img_metas), Tensor(gt_bboxes), Tensor(gt_labels), Tensor(gt_num))
output = net(img_data, img_metas, gt_bboxes, gt_labels, gt_num)
end = time.time()
print("Iter {} cost time {}".format(eval_iter, end - start))

@ -57,7 +57,7 @@ def MaskRcnn_eval(dataset_path, ckpt_path, ann_file):
print("total images num: ", total)
print("Processing, please wait a moment.")
max_num = 128
for data in ds.create_dict_iterator():
for data in ds.create_dict_iterator(output_numpy=True):
eval_iter = eval_iter + 1
img_data = data['image']

@ -109,7 +109,7 @@ def extract_features(net, dataset_path, config):
config=config,
repeat_num=1)
step_size = dataset.get_dataset_size()
pbar = tqdm(list(dataset.create_dict_iterator()))
pbar = tqdm(list(dataset.create_dict_iterator(output_numpy=True)))
model = Model(net)
i = 0
for data in pbar:

@ -146,7 +146,7 @@ def test(cloud_args=None):
per_batch_size=args.per_batch_size,
max_epoch=1, rank=args.rank, group_size=args.group_size,
mode='eval')
eval_dataloader = de_dataset.create_tuple_iterator()
eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True)
network = get_network(args.backbone, args.num_classes, platform=args.platform)
if network is None:
raise NotImplementedError('not implement {}'.format(args.backbone))

@ -44,7 +44,7 @@ def ssd_eval(dataset_path, ckpt_path):
print("\n========================================\n")
print("total images num: ", total)
print("Processing, please wait a moment.")
for data in ds.create_dict_iterator():
for data in ds.create_dict_iterator(output_numpy=True):
img_id = data['img_id']
img_np = data['image']
image_shape = data['image_shape']

@ -159,7 +159,7 @@ def test(cloud_args=None):
for model in args.models:
dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size, mode='eval')
eval_dataloader = dataset.create_tuple_iterator()
eval_dataloader = dataset.create_tuple_iterator(output_numpy=True)
network = vgg16(args.num_classes, args, phase="test")
# pre_trained

@ -300,10 +300,10 @@ def test():
input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
args.logger.info('Start inference....')
for i, data in enumerate(ds.create_dict_iterator()):
image = Tensor(data["image"])
image = data["image"]
image_shape = Tensor(data["image_shape"])
image_id = Tensor(data["img_id"])
image_shape = data["image_shape"]
image_id = data["img_id"]
prediction = network(image, input_shape)
output_big, output_me, output_small = prediction

@ -299,7 +299,7 @@ def train():
old_progress = -1
t_end = time.time()
data_loader = ds.create_dict_iterator()
data_loader = ds.create_dict_iterator(output_numpy=True)
for i, data in enumerate(data_loader):
images = data["image"]

@ -306,10 +306,10 @@ def test():
input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
args.logger.info('Start inference....')
for i, data in enumerate(ds.create_dict_iterator()):
image = Tensor(data["image"])
image = data["image"]
image_shape = Tensor(data["image_shape"])
image_id = Tensor(data["img_id"])
image_shape = data["image_shape"]
image_id = data["img_id"]
prediction = network(image, input_shape)
output_big, output_me, output_small = prediction

@ -303,7 +303,7 @@ def train():
old_progress = -1
t_end = time.time()
data_loader = ds.create_dict_iterator()
data_loader = ds.create_dict_iterator(output_numpy=True)
shape_record = ShapeRecord()
for i, data in enumerate(data_loader):

@ -44,7 +44,7 @@ def yolo_eval(dataset_path, ckpt_path):
print("\n========================================\n")
print("total images num: ", total)
print("Processing, please wait a moment.")
for data in ds.create_dict_iterator():
for data in ds.create_dict_iterator(output_numpy=True):
img_np = data['image']
image_shape = data['image_shape']
annotation = data['annotation']

@ -52,7 +52,7 @@ def train_and_eval():
eval_class = BGCFEvaluate(parser, train_graph, test_graph, parser.Ks)
itr = train_ds.create_dict_iterator(parser.num_epoch)
itr = train_ds.create_dict_iterator(parser.num_epoch, output_numpy=True)
num_iter = int(num_pairs / parser.batch_pairs)
for _epoch in range(1, parser.num_epoch + 1):

@ -29,7 +29,6 @@ from mindspore import context
from mindspore import log as logger
from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
from mindspore.nn.optim import AdamWeightDecay, Lamb, Momentum
from mindspore.common.tensor import Tensor
from mindspore.train.model import Model
from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
from mindspore.train.serialization import load_checkpoint, load_param_into_net
@ -123,7 +122,7 @@ def do_eval(dataset=None, network=None, num_class=2, assessment_method="accuracy
for data in dataset.create_dict_iterator():
input_data = []
for i in columns_list:
input_data.append(Tensor(data[i]))
input_data.append(data[i])
input_ids, input_mask, token_type_id, label_ids = input_data
logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
callback.update(logits, label_ids)

@ -30,7 +30,6 @@ from mindspore import context
from mindspore import log as logger
from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
from mindspore.nn.optim import AdamWeightDecay, Lamb, Momentum
from mindspore.common.tensor import Tensor
from mindspore.train.model import Model
from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
from mindspore.train.serialization import load_checkpoint, load_param_into_net
@ -132,7 +131,7 @@ def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_meth
for data in dataset.create_dict_iterator():
input_data = []
for i in columns_list:
input_data.append(Tensor(data[i]))
input_data.append(data[i])
input_ids, input_mask, token_type_id, label_ids = input_data
logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
callback.update(logits, label_ids)

@ -112,7 +112,7 @@ def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="",
for data in dataset.create_dict_iterator():
input_data = []
for i in columns_list:
input_data.append(Tensor(data[i]))
input_data.append(data[i])
input_ids, input_mask, segment_ids, unique_ids = input_data
start_positions = Tensor([1], mstype.float32)
end_positions = Tensor([1], mstype.float32)

@ -107,7 +107,7 @@ def transformer_infer(config, dataset):
probs = []
source_sentences = []
target_sentences = []
for batch in dataset.create_dict_iterator():
for batch in dataset.create_dict_iterator(output_numpy=True):
source_sentences.append(batch["source_eos_ids"])
target_sentences.append(batch["target_eos_ids"])
@ -232,7 +232,7 @@ def transformer_infer_ppl(config, dataset):
lengths = []
source_sentences = []
target_sentences = []
for batch in dataset.create_dict_iterator():
for batch in dataset.create_dict_iterator(output_numpy=True):
source_sentences.append(batch["source_eos_ids"])
target_sentences.append(batch["target_eos_ids"])

@ -19,7 +19,6 @@ import os
import re
import argparse
import mindspore.common.dtype as mstype
from mindspore import Tensor
from mindspore import context
from mindspore.train.model import Model
from mindspore.train.callback import TimeMonitor
@ -282,7 +281,7 @@ def do_eval_standalone():
for data in eval_dataset.create_dict_iterator():
input_data = []
for i in columns_list:
input_data.append(Tensor(data[i]))
input_data.append(data[i])
input_ids, input_mask, token_type_id, label_ids = input_data
logits = eval_model(input_ids, token_type_id, input_mask)
callback.update(logits[3], label_ids)

@ -96,7 +96,7 @@ class EvalCallBack(Callback):
for data in self.dataset.create_dict_iterator():
input_data = []
for i in columns_list:
input_data.append(Tensor(data[i]))
input_data.append(data[i])
input_ids, input_mask, token_type_id, label_ids = input_data
self.network.set_train(False)
logits = self.network(input_ids, token_type_id, input_mask)

@ -113,7 +113,7 @@ def run_transformer_eval():
predictions = []
source_sents = []
target_sents = []
for batch in dataset.create_dict_iterator():
for batch in dataset.create_dict_iterator(output_numpy=True):
source_sents.append(batch["source_eos_ids"])
target_sents.append(batch["target_eos_ids"])
source_ids = Tensor(batch["source_eos_ids"], mstype.int32)

@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers,
shuffle=True)
index = 0
for item in data_set.create_dict_iterator():
for item in data_set.create_dict_iterator(output_numpy=True):
print("example {}: {}".format(index, item))
index += 1
if index % 1000 == 0:

@ -28,7 +28,7 @@ args = parser.parse_args()
data_set = ds.MindDataset(args.path)
num_iter = 0
for item in data_set.create_dict_iterator():
for item in data_set.create_dict_iterator(output_numpy=True):
print(item)
num_iter += 1
print("Total items # is {}".format(num_iter))

@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers,
shuffle=True)
index = 0
for item in data_set.create_dict_iterator():
for item in data_set.create_dict_iterator(output_numpy=True):
print("example {}: {}".format(index, item))
index += 1
if index % 1000 == 0:

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save