Add num_epochs to non-sink training

pull/6018/head
hesham 4 years ago
parent e69d868016
commit 9cee0d2143

@ -63,7 +63,7 @@ class SVI:
for _ in range(1, epochs+1): for _ in range(1, epochs+1):
train_loss = 0 train_loss = 0
dataset_size = 0 dataset_size = 0
for data in train_dataset.create_dict_iterator(): for data in train_dataset.create_dict_iterator(num_epochs=1):
x = Tensor(data['image'], dtype=mstype.float32) x = Tensor(data['image'], dtype=mstype.float32)
y = Tensor(data['label'], dtype=mstype.int32) y = Tensor(data['label'], dtype=mstype.int32)
dataset_size += len(x) dataset_size += len(x)

@ -145,7 +145,7 @@ class DatasetHelper:
self.iter = iterclass(dataset, sink_size, epoch_num) self.iter = iterclass(dataset, sink_size, epoch_num)
else: else:
iterclass = _DatasetIterNormal iterclass = _DatasetIterNormal
self.iter = iterclass(dataset) self.iter = iterclass(dataset, epoch_num=epoch_num)
def __iter__(self): def __iter__(self):
return self.iter.__iter__() return self.iter.__iter__()
@ -290,11 +290,12 @@ class _DatasetIterPSLite(_DatasetIter):
class _DatasetIterNormal: class _DatasetIterNormal:
"""Iter for normal(non sink) mode, feed the data from host.""" """Iter for normal(non sink) mode, feed the data from host."""
def __init__(self, dataset):
def __init__(self, dataset, epoch_num=-1):
self.dataset = dataset self.dataset = dataset
self.device_num = _get_device_num() self.device_num = _get_device_num()
self.global_rank = _get_global_rank() self.global_rank = _get_global_rank()
self.iter = self.dataset.create_tuple_iterator() self.iter = self.dataset.create_tuple_iterator(num_epochs=epoch_num)
def __iter__(self): def __iter__(self):
return self return self

@ -460,7 +460,8 @@ class Model:
is_train=True, is_train=True,
phase='train', phase='train',
dataset=train_dataset, dataset=train_dataset,
dataset_sink_mode=False) dataset_sink_mode=False,
epoch_num=epoch)
cb_params.cur_step_num = 0 cb_params.cur_step_num = 0
run_context = RunContext(cb_params) run_context = RunContext(cb_params)
list_callback.begin(run_context) list_callback.begin(run_context)

@ -57,7 +57,7 @@ def FasterRcnn_eval(dataset_path, ckpt_path, ann_file):
print("total images num: ", total) print("total images num: ", total)
print("Processing, please wait a moment.") print("Processing, please wait a moment.")
max_num = 128 max_num = 128
for data in ds.create_dict_iterator(): for data in ds.create_dict_iterator(num_epochs=1):
eval_iter = eval_iter + 1 eval_iter = eval_iter + 1
img_data = data['image'] img_data = data['image']

@ -57,7 +57,7 @@ def MaskRcnn_eval(dataset_path, ckpt_path, ann_file):
print("total images num: ", total) print("total images num: ", total)
print("Processing, please wait a moment.") print("Processing, please wait a moment.")
max_num = 128 max_num = 128
for data in ds.create_dict_iterator(output_numpy=True): for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
eval_iter = eval_iter + 1 eval_iter = eval_iter + 1
img_data = data['image'] img_data = data['image']

@ -200,7 +200,7 @@ def test(cloud_args=None):
per_batch_size=args.per_batch_size, per_batch_size=args.per_batch_size,
max_epoch=1, rank=args.rank, group_size=args.group_size, max_epoch=1, rank=args.rank, group_size=args.group_size,
mode='eval') mode='eval')
eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True) eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True, num_epochs=1)
network = get_network(args.backbone, num_classes=args.num_classes, platform=args.platform) network = get_network(args.backbone, num_classes=args.num_classes, platform=args.platform)
if network is None: if network is None:
raise NotImplementedError('not implement {}'.format(args.backbone)) raise NotImplementedError('not implement {}'.format(args.backbone))

@ -44,7 +44,7 @@ def ssd_eval(dataset_path, ckpt_path):
print("\n========================================\n") print("\n========================================\n")
print("total images num: ", total) print("total images num: ", total)
print("Processing, please wait a moment.") print("Processing, please wait a moment.")
for data in ds.create_dict_iterator(output_numpy=True): for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
img_id = data['img_id'] img_id = data['img_id']
img_np = data['image'] img_np = data['image']
image_shape = data['image_shape'] image_shape = data['image_shape']

@ -159,7 +159,7 @@ def test(cloud_args=None):
for model in args.models: for model in args.models:
dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size, mode='eval') dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size, mode='eval')
eval_dataloader = dataset.create_tuple_iterator(output_numpy=True) eval_dataloader = dataset.create_tuple_iterator(output_numpy=True, num_epochs=1)
network = vgg16(args.num_classes, args, phase="test") network = vgg16(args.num_classes, args, phase="test")
# pre_trained # pre_trained

@ -299,7 +299,7 @@ def test():
input_shape = Tensor(tuple(config.test_img_shape), ms.float32) input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
args.logger.info('Start inference....') args.logger.info('Start inference....')
for i, data in enumerate(ds.create_dict_iterator()): for i, data in enumerate(ds.create_dict_iterator(num_epochs=1)):
image = data["image"] image = data["image"]
image_shape = data["image_shape"] image_shape = data["image_shape"]

@ -239,7 +239,7 @@ def train():
old_progress = -1 old_progress = -1
t_end = time.time() t_end = time.time()
data_loader = ds.create_dict_iterator(output_numpy=True) data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)
for i, data in enumerate(data_loader): for i, data in enumerate(data_loader):
images = data["image"] images = data["image"]

@ -305,7 +305,7 @@ def test():
input_shape = Tensor(tuple(config.test_img_shape), ms.float32) input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
args.logger.info('Start inference....') args.logger.info('Start inference....')
for i, data in enumerate(ds.create_dict_iterator()): for i, data in enumerate(ds.create_dict_iterator(num_epochs=1)):
image = data["image"] image = data["image"]
image_shape = data["image_shape"] image_shape = data["image_shape"]

@ -224,7 +224,7 @@ def train():
old_progress = -1 old_progress = -1
t_end = time.time() t_end = time.time()
data_loader = ds.create_dict_iterator(output_numpy=True) data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)
shape_record = ShapeRecord() shape_record = ShapeRecord()
for i, data in enumerate(data_loader): for i, data in enumerate(data_loader):

@ -44,7 +44,7 @@ def yolo_eval(dataset_path, ckpt_path):
print("\n========================================\n") print("\n========================================\n")
print("total images num: ", total) print("total images num: ", total)
print("Processing, please wait a moment.") print("Processing, please wait a moment.")
for data in ds.create_dict_iterator(output_numpy=True): for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
img_np = data['image'] img_np = data['image']
image_shape = data['image_shape'] image_shape = data['image_shape']
annotation = data['annotation'] annotation = data['annotation']

@ -119,7 +119,7 @@ def do_eval(dataset=None, network=None, num_class=2, assessment_method="accuracy
raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]") raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"] columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
for data in dataset.create_dict_iterator(): for data in dataset.create_dict_iterator(num_epochs=1):
input_data = [] input_data = []
for i in columns_list: for i in columns_list:
input_data.append(data[i]) input_data.append(data[i])

@ -128,7 +128,7 @@ def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_meth
raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]") raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"] columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
for data in dataset.create_dict_iterator(): for data in dataset.create_dict_iterator(num_epochs=1):
input_data = [] input_data = []
for i in columns_list: for i in columns_list:
input_data.append(data[i]) input_data.append(data[i])

@ -109,7 +109,7 @@ def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="",
output = [] output = []
RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"]) RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"] columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"]
for data in dataset.create_dict_iterator(): for data in dataset.create_dict_iterator(num_epochs=1):
input_data = [] input_data = []
for i in columns_list: for i in columns_list:
input_data.append(data[i]) input_data.append(data[i])

@ -107,7 +107,7 @@ def transformer_infer(config, dataset):
probs = [] probs = []
source_sentences = [] source_sentences = []
target_sentences = [] target_sentences = []
for batch in dataset.create_dict_iterator(output_numpy=True): for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
source_sentences.append(batch["source_eos_ids"]) source_sentences.append(batch["source_eos_ids"])
target_sentences.append(batch["target_eos_ids"]) target_sentences.append(batch["target_eos_ids"])
@ -232,7 +232,7 @@ def transformer_infer_ppl(config, dataset):
lengths = [] lengths = []
source_sentences = [] source_sentences = []
target_sentences = [] target_sentences = []
for batch in dataset.create_dict_iterator(output_numpy=True): for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
source_sentences.append(batch["source_eos_ids"]) source_sentences.append(batch["source_eos_ids"])
target_sentences.append(batch["target_eos_ids"]) target_sentences.append(batch["target_eos_ids"])

@ -278,7 +278,7 @@ def do_eval_standalone():
callback = Accuracy() callback = Accuracy()
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"] columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
for data in eval_dataset.create_dict_iterator(): for data in eval_dataset.create_dict_iterator(num_epochs=1):
input_data = [] input_data = []
for i in columns_list: for i in columns_list:
input_data.append(data[i]) input_data.append(data[i])

@ -93,7 +93,7 @@ class EvalCallBack(Callback):
if cb_params.cur_step_num % 100 == 0: if cb_params.cur_step_num % 100 == 0:
callback = Accuracy() callback = Accuracy()
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"] columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
for data in self.dataset.create_dict_iterator(): for data in self.dataset.create_dict_iterator(num_epochs=1):
input_data = [] input_data = []
for i in columns_list: for i in columns_list:
input_data.append(data[i]) input_data.append(data[i])

@ -113,7 +113,7 @@ def run_transformer_eval():
predictions = [] predictions = []
source_sents = [] source_sents = []
target_sents = [] target_sents = []
for batch in dataset.create_dict_iterator(output_numpy=True): for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
source_sents.append(batch["source_eos_ids"]) source_sents.append(batch["source_eos_ids"])
target_sents.append(batch["target_eos_ids"]) target_sents.append(batch["target_eos_ids"])
source_ids = Tensor(batch["source_eos_ids"], mstype.int32) source_ids = Tensor(batch["source_eos_ids"], mstype.int32)

@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers, num_parallel_workers=num_readers,
shuffle=True) shuffle=True)
index = 0 index = 0
for item in data_set.create_dict_iterator(output_numpy=True): for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
print("example {}: {}".format(index, item)) print("example {}: {}".format(index, item))
index += 1 index += 1
if index % 1000 == 0: if index % 1000 == 0:

@ -28,7 +28,7 @@ args = parser.parse_args()
data_set = ds.MindDataset(args.path) data_set = ds.MindDataset(args.path)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(output_numpy=True): for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
print(item) print(item)
num_iter += 1 num_iter += 1
print("Total items # is {}".format(num_iter)) print("Total items # is {}".format(num_iter))

@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers, num_parallel_workers=num_readers,
shuffle=True) shuffle=True)
index = 0 index = 0
for item in data_set.create_dict_iterator(output_numpy=True): for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
print("example {}: {}".format(index, item)) print("example {}: {}".format(index, item))
index += 1 index += 1
if index % 1000 == 0: if index % 1000 == 0:

@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers, num_parallel_workers=num_readers,
shuffle=True) shuffle=True)
index = 0 index = 0
for item in data_set.create_dict_iterator(output_numpy=True): for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
print("example {}: {}".format(index, item)) print("example {}: {}".format(index, item))
index += 1 index += 1
if index % 1000 == 0: if index % 1000 == 0:

@ -55,7 +55,7 @@ class MindData:
self.send_epoch_end = send_epoch_end self.send_epoch_end = send_epoch_end
return self return self
def create_tuple_iterator(self): def create_tuple_iterator(self, num_epochs=-1):
return self.__iter__() return self.__iter__()
def send(self, num_epochs=-1): def send(self, num_epochs=-1):

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save