From 0cb5c47856df398b0d4cebe7779e7c519b32ddc3 Mon Sep 17 00:00:00 2001 From: ms_yan <6576637+ms_yan@user.noreply.gitee.com> Date: Thu, 12 Nov 2020 23:44:17 +0800 Subject: [PATCH] add err modify --- mindspore/dataset/callback/ds_callback.py | 4 +- mindspore/dataset/core/validator_helpers.py | 12 +-- mindspore/dataset/engine/datasets.py | 89 ++++++++++--------- mindspore/dataset/engine/graphdata.py | 22 ++--- mindspore/dataset/engine/iterators.py | 24 +++-- mindspore/dataset/engine/samplers.py | 45 +++++----- .../dataset/engine/serializer_deserializer.py | 14 +-- mindspore/dataset/engine/validators.py | 68 +++++++------- mindspore/dataset/text/transforms.py | 16 ++-- mindspore/dataset/text/validators.py | 5 +- .../dataset/transforms/py_transforms_util.py | 2 +- mindspore/dataset/vision/py_transforms.py | 2 +- .../dataset/vision/py_transforms_util.py | 62 ++++++------- mindspore/dataset/vision/validators.py | 14 +-- tests/ut/python/dataset/test_c_compose.py | 2 +- tests/ut/python/dataset/test_compose.py | 2 +- .../python/dataset/test_datasets_generator.py | 3 +- .../dataset/test_minddataset_exception.py | 2 +- tests/ut/python/dataset/test_normalizeOp.py | 4 +- tests/ut/python/dataset/test_paddeddataset.py | 2 +- .../dataset/test_random_select_subpolicy.py | 4 +- 21 files changed, 206 insertions(+), 192 deletions(-) diff --git a/mindspore/dataset/callback/ds_callback.py b/mindspore/dataset/callback/ds_callback.py index e4a1a45412..097a4b6491 100644 --- a/mindspore/dataset/callback/ds_callback.py +++ b/mindspore/dataset/callback/ds_callback.py @@ -186,7 +186,7 @@ class WaitedDSCallback(Callback, DSCallback): success = self.epoch_event.wait(timeout=ds.config.get_callback_timeout()) self.epoch_event.clear() if not success: - raise RuntimeError(f"ds_epoch_begin timed out after {ds.config.get_callback_timeout()} second(s)") + raise RuntimeError(f"ds_epoch_begin timed out after {ds.config.get_callback_timeout()} second(s).") # by the time this thread wakes up, self.epoch_run_context is already available self.sync_epoch_begin(self.epoch_run_context, ds_run_context) @@ -212,7 +212,7 @@ class WaitedDSCallback(Callback, DSCallback): success = self.step_event.wait(timeout=ds.config.get_callback_timeout()) self.step_event.clear() if not success: - raise RuntimeError(f"ds_step_begin timed out after {ds.config.get_callback_timeout()} second(s)") + raise RuntimeError(f"ds_step_begin timed out after {ds.config.get_callback_timeout()} second(s).") # by the time this thread wakes up, self.epoch_run_context is already available self.sync_step_begin(self.step_run_context, ds_run_context) diff --git a/mindspore/dataset/core/validator_helpers.py b/mindspore/dataset/core/validator_helpers.py index 7bfdccf427..fcebdc8ef4 100644 --- a/mindspore/dataset/core/validator_helpers.py +++ b/mindspore/dataset/core/validator_helpers.py @@ -122,7 +122,7 @@ def check_pos_float64(value, arg_name=""): def check_valid_detype(type_): if type_ not in valid_detype: - raise ValueError("Unknown column type") + raise TypeError("Unknown column type.") return True @@ -146,10 +146,10 @@ def check_columns(columns, name): type_check(columns, (list, str), name) if isinstance(columns, str): if not columns: - raise ValueError("{0} should not be an empty str".format(name)) + raise ValueError("{0} should not be an empty str.".format(name)) elif isinstance(columns, list): if not columns: - raise ValueError("{0} should not be empty".format(name)) + raise ValueError("{0} should not be empty.".format(name)) for i, column_name in enumerate(columns): if not column_name: raise ValueError("{0}[{1}] should not be empty.".format(name, i)) @@ -250,10 +250,10 @@ def check_filename(path): forbidden_symbols = set(r'\/:*?"<>|`&\';') if set(filename) & forbidden_symbols: - raise ValueError(r"filename should not contains \/:*?\"<>|`&;\'") + raise ValueError(r"filename should not contain \/:*?\"<>|`&;\'") if filename.startswith(' ') or filename.endswith(' '): - raise ValueError("filename should not start/end with space") + raise ValueError("filename should not start/end with space.") return True @@ -374,4 +374,4 @@ def check_gnn_list_or_ndarray(param, param_name): def check_tensor_op(param, param_name): """check whether param is a tensor op or a callable Python function""" if not isinstance(param, cde.TensorOp) and not callable(param): - raise TypeError("{0} is not a c_transform op (TensorOp) nor a callable pyfunc.".format(param_name)) + raise TypeError("{0} is neither a c_transform op (TensorOp) nor a callable pyfunc.".format(param_name)) diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index a86cf602ff..5aa8fc525f 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -115,11 +115,11 @@ def get_num_rows(num_rows, num_shards): ValueError: If num_shards is invalid (<= 0). """ if num_rows < 0: - raise ValueError("num_rows is invalid (< 0)") + raise ValueError("num_rows is invalid, less than 0.") if num_shards is not None: if num_shards <= 0: - raise ValueError("num_shards is invalid (<= 0)") + raise ValueError("num_shards is invalid, less than or equal to 0.") if num_rows % num_shards == 0: num_rows = num_rows // num_shards else: @@ -867,7 +867,7 @@ class Dataset: elif isinstance(datasets, Dataset): datasets = (self, datasets) else: - raise TypeError("The zip function %s type error!" % (datasets)) + raise TypeError("Invalid datasets, expected Dataset object or tuple of Dataset, but got %s!" % (datasets)) return ZipDataset(datasets) @check_concat @@ -900,7 +900,7 @@ class Dataset: elif isinstance(datasets, list): datasets = [self] + datasets else: - raise TypeError("The concat_dataset function %s type error!" % (datasets)) + raise TypeError("Invalid datasets, expected Dataset object or list of Dataset, but got %s!" % (datasets)) return ConcatDataset(datasets) @check_rename @@ -1053,7 +1053,7 @@ class Dataset: raise TypeError("Please set device_type in context") if device_type not in ('Ascend', 'GPU', 'CPU'): - raise ValueError("Only support CPU, Ascend, GPU") + raise ValueError("Only CPU, Ascend or GPU device type is supported.") def get_distribution(output_dataset): dev_id = 0 @@ -1070,7 +1070,7 @@ class Dataset: return "", dev_id if not output_dataset.children: - raise RuntimeError("Unknown output_dataset: {}".format(type(output_dataset))) + raise RuntimeError("Unknown output_dataset: {}.".format(type(output_dataset))) input_dataset = output_dataset.children[0] return get_distribution(input_dataset) @@ -1082,9 +1082,9 @@ class Dataset: dist = json.load(distribution_f) device_id = dist["deviceId"] except json.decoder.JSONDecodeError: - raise RuntimeError("Json decode error when load distribution file") + raise RuntimeError("Json decode error when load distribution file.") except Exception: - raise RuntimeError("Distribution file failed to read") + raise RuntimeError("Failed to read Distribution file.") return TransferDataset(self, queue_name, device_id, device_type, send_epoch_end) @@ -1344,12 +1344,12 @@ class Dataset: if isinstance(num_batch, int) and num_batch <= 0: # throwing exception, disable all sync_wait in pipeline self.disable_sync() - raise RuntimeError("Sync_update batch size can only be positive, got : {}".format(num_batch)) + raise RuntimeError("Sync_update batch size can only be positive, got : {}.".format(num_batch)) notifiers_dict = self.get_sync_notifiers() if condition_name not in notifiers_dict: # throwing exception, disable all sync_wait in pipeline self.disable_sync() - raise RuntimeError("Condition name not found") + raise RuntimeError("Condition name not found.") if num_batch is not None: num_batch *= self.get_batch_size() notifiers_dict[condition_name](num_batch, data) @@ -1427,7 +1427,7 @@ class SourceDataset(Dataset): unmatched_patterns.append(pattern) if unmatched_patterns: - raise ValueError("The following patterns did not match any files: ", unmatched_patterns) + raise ValueError("The following patterns did not match any files: {}.".format(unmatched_patterns)) if file_list: # not empty return file_list @@ -1871,7 +1871,7 @@ class BlockReleasePair: timeout=get_callback_timeout()) # time_out will be False if time out occurs if not not_time_out: - logger.warning("Timeout happened in sync_wait, disabling lock") + logger.warning("Timeout happened in sync_wait, disabling lock.") self.disable = True self.row_count += 1 return True @@ -1916,8 +1916,8 @@ class SyncWaitDataset(DatasetOp): self._pair = BlockReleasePair(num_batch, callback) if self._condition_name in self.children[0].get_sync_notifiers(): - raise RuntimeError("Condition name is already in use") - logger.warning("Please remember to add dataset.sync_update(condition=%s), otherwise will result in hanging", + raise RuntimeError("Condition name is already in use.") + logger.warning("Please remember to add dataset.sync_update(condition=%s), otherwise hanging will result.", condition_name) def get_sync_notifiers(self): @@ -1979,7 +1979,7 @@ class ShuffleDataset(DatasetOp): input_dataset.parent.append(self) self._input_indexs = input_dataset.input_indexs if self.is_sync(): - raise RuntimeError("No shuffle after sync operators") + raise RuntimeError("No shuffle after sync operators.") def get_args(self): args = super().get_args() @@ -2042,7 +2042,7 @@ class _PythonCallable: except KeyboardInterrupt: self.pool.terminate() self.pool.join() - raise Exception("Multiprocess MapOp worker receives KeyboardInterrupt") + raise Exception("Multiprocess MapOp worker receives KeyboardInterrupt.") return (None,) # Invoke original Python callable in master process in case the pool is gone. return self.py_callable(*args) @@ -2113,7 +2113,8 @@ class MapDataset(DatasetOp): if self.input_columns and self.output_columns \ and len(self.input_columns) != len(self.output_columns) \ and self.column_order is None: - raise ValueError("When (len(input_columns) != len(output_columns)), column_order must be specified.") + raise ValueError("When length of input_columns and output_columns are not equal," + " column_order must be specified.") input_dataset.parent.append(self) self._input_indexs = input_dataset.input_indexs @@ -2387,7 +2388,7 @@ class ZipDataset(DatasetOp): super().__init__() for dataset in datasets: if not isinstance(dataset, Dataset): - raise TypeError("The parameter %s of zip has type error!" % (dataset)) + raise TypeError("Invalid dataset, expected Dataset object, but got %s!" % type(dataset)) self.datasets = datasets for data in datasets: self.children.append(data) @@ -2439,7 +2440,7 @@ class ConcatDataset(DatasetOp): super().__init__() for dataset in datasets: if not isinstance(dataset, Dataset): - raise TypeError("The parameter %s of concat has type error!" % (dataset)) + raise TypeError("Invalid dataset, expected Dataset object, but got %s!" % type(dataset)) self.datasets = datasets self._sampler = None for data in datasets: @@ -2450,8 +2451,8 @@ class ConcatDataset(DatasetOp): child_index = 0 for item in self.children_sizes_: if item == 0: - raise ValueError("There is no samples in the %dth dataset. Please make sure there are " - "valid samples in the dataset" % child_index) + raise ValueError("There are no samples in the dataset number %d. Please make sure there are " + "valid samples in the dataset." % child_index) child_index += 1 # _children_flag_and_nums: A list of pair.The first element of pair is flag that characterizes @@ -2506,7 +2507,7 @@ class ConcatDataset(DatasetOp): raise TypeError("The parameter %s of concat must be DistributedSampler!" % (sampler)) if sampler.is_shuffled(): - raise ValueError("The parameter shuffle of DistributedSampler must to be False!") + raise ValueError("The parameter shuffle of DistributedSampler must be False!") if sampler.num_shards <= 0: raise ValueError("The parameter num_shards of DistributedSampler must be positive int!") @@ -2654,10 +2655,10 @@ class TransferDataset(DatasetOp): raise RuntimeError("TransferDataset is not iterable.") def output_shapes(self): - raise RuntimeError("TransferDataset does not support output_shapes.") + raise RuntimeError("TransferDataset does not support obtaining output_shapes.") def output_types(self): - raise RuntimeError("TransferDataset does not support output_types.") + raise RuntimeError("TransferDataset does not support obtaining output_types.") def send(self, num_epochs=-1): # need to keep iterator alive so the executionTree is not destroyed @@ -2739,7 +2740,7 @@ def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id, n (any(arg is not None for arg in [num_shards, shard_id, shuffle, num_samples]))): raise ValueError( 'Conflicting arguments during sampler assignments. num_samples: {}, num_shards: {},' - ' shard_id: {}, shuffle: {})'.format(num_samples, num_shards, shard_id, shuffle)) + ' shard_id: {}, shuffle: {}.'.format(num_samples, num_shards, shard_id, shuffle)) return input_sampler if shuffle is None: if num_shards is not None: @@ -3342,13 +3343,13 @@ class SamplerFn: try: result = self.workers[i % self.num_worker].get() except queue.Empty: - raise Exception("Generator worker process timeout") + raise Exception("Generator worker process timeout.") except KeyboardInterrupt: self.eof.set() for w in self.workers: w.terminate() w.join() - raise Exception("Generator worker receives KeyboardInterrupt") + raise Exception("Generator worker receives KeyboardInterrupt.") if idx_cursor < len(indices): idx_cursor = _fill_worker_indices(self.workers, indices, idx_cursor) yield tuple([np.array(x, copy=False) for x in result]) @@ -3366,7 +3367,7 @@ def _generator_worker_loop(dataset, idx_queue, result_queue, eof): try: idx = idx_queue.get(timeout=1) except KeyboardInterrupt: - raise Exception("Generator worker receives KeyboardInterrupt") + raise Exception("Generator worker receives KeyboardInterrupt.") except queue.Empty: if eof.is_set(): return @@ -3386,7 +3387,7 @@ def _generator_worker_loop(dataset, idx_queue, result_queue, eof): try: result_queue.put(result, timeout=5) except KeyboardInterrupt: - raise Exception("Generator worker receives KeyboardInterrupt") + raise Exception("Generator worker receives KeyboardInterrupt.") except queue.Full: if eof.is_set(): return @@ -3398,7 +3399,7 @@ def _generator_worker_loop(dataset, idx_queue, result_queue, eof): class _GeneratorWorkerMt(threading.Thread): """ - Worker process for multithread Generator. + Worker process for multi-thread Generator. """ def __init__(self, dataset, eof): @@ -3454,10 +3455,10 @@ class _GeneratorWorkerMp(multiprocessing.Process): def queue_empty(self): if not self.idx_queue.empty(): - logger.error("idx_queue is not empty") + logger.error("idx_queue is not empty.") return False if not self.res_queue.empty(): - logger.error("res_queue is not empty") + logger.error("res_queue is not empty.") return False return True @@ -3755,7 +3756,8 @@ class TFRecordDataset(SourceDataset): self.num_samples = schema_obj.num_rows if not isinstance(shuffle, (bool, Shuffle)): - raise TypeError("shuffle must be of type boolean or enum 'Shuffle'.") + raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like" + " 'Shuffle.GLOBAL' or 'Shuffle.FILES'.") if not isinstance(shuffle, Shuffle): if shuffle: self.shuffle_level = Shuffle.GLOBAL @@ -4517,11 +4519,11 @@ class Schema: try: name = column.pop("name") except KeyError: - raise RuntimeError("Column's name is missing") + raise RuntimeError("Column's name is missing.") try: de_type = column.pop("type") except KeyError: - raise RuntimeError("Column' type is missing") + raise RuntimeError("Column's type is missing.") shape = column.pop("shape", None) column.pop("t_impl", None) column.pop("rank", None) @@ -4534,7 +4536,7 @@ class Schema: try: de_type = value.pop("type") except KeyError: - raise RuntimeError("Column' type is missing") + raise RuntimeError("Column's type is missing.") shape = value.pop("shape", None) value.pop("t_impl", None) value.pop("rank", None) @@ -4566,13 +4568,13 @@ class Schema: elif k == "columns": self.parse_columns(v) else: - raise RuntimeError("Unknown field %s" % k) + raise RuntimeError("Unknown field %s." % k) if self.columns is None: raise RuntimeError("Columns are missing.") if self.num_rows is not None: if not isinstance(self.num_rows, int) or self.num_rows <= 0: - raise ValueError("numRows must be greater than 0") + raise ValueError("numRows must be greater than 0.") def __str__(self): return self.to_json() @@ -5112,7 +5114,7 @@ class CelebADataset(MappableDataset): if int(split_line[1]) == usage_type: partition_num += 1 except FileNotFoundError: - raise RuntimeError("Partition file can not be found") + raise RuntimeError("Partition file can not be found.") if partition_num < num_rows: num_rows = partition_num @@ -5322,7 +5324,8 @@ class CLUEDataset(SourceDataset): self.cols_to_keyword = self.task_dict[task][usage] if not isinstance(shuffle, (bool, Shuffle)): - raise TypeError("shuffle must be of type boolean or enum 'Shuffle'.") + raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like" + " 'Shuffle.GLOBAL' or 'Shuffle.FILES'.") if not isinstance(shuffle, Shuffle): if shuffle: self.shuffle_level = Shuffle.GLOBAL @@ -5437,7 +5440,8 @@ class CSVDataset(SourceDataset): self.num_samples = num_samples if not isinstance(shuffle, (bool, Shuffle)): - raise TypeError("shuffle must be of type boolean or enum 'Shuffle'.") + raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like" + " 'Shuffle.GLOBAL' or 'Shuffle.FILES'.") if not isinstance(shuffle, Shuffle): if shuffle: self.shuffle_level = Shuffle.GLOBAL @@ -5545,7 +5549,8 @@ class TextFileDataset(SourceDataset): self.num_samples = num_samples if not isinstance(shuffle, (bool, Shuffle)): - raise TypeError("shuffle must be of type boolean or enum 'Shuffle'.") + raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like" + " 'Shuffle.GLOBAL' or 'Shuffle.FILES'.") if not isinstance(shuffle, Shuffle): if shuffle: self.shuffle_level = Shuffle.GLOBAL diff --git a/mindspore/dataset/engine/graphdata.py b/mindspore/dataset/engine/graphdata.py index 622160e210..1cd2bea067 100644 --- a/mindspore/dataset/engine/graphdata.py +++ b/mindspore/dataset/engine/graphdata.py @@ -89,7 +89,7 @@ class GraphData: while self._graph_data.is_stoped() is not True: time.sleep(1) except KeyboardInterrupt: - raise Exception("Graph data server receives KeyboardInterrupt") + raise Exception("Graph data server receives KeyboardInterrupt.") @check_gnn_get_all_nodes def get_all_nodes(self, node_type): @@ -112,7 +112,7 @@ class GraphData: TypeError: If `node_type` is not integer. """ if self._working_mode == 'server': - raise Exception("This method is not supported when working mode is server") + raise Exception("This method is not supported when working mode is server.") return self._graph_data.get_all_nodes(node_type).as_array() @check_gnn_get_all_edges @@ -136,7 +136,7 @@ class GraphData: TypeError: If `edge_type` is not integer. """ if self._working_mode == 'server': - raise Exception("This method is not supported when working mode is server") + raise Exception("This method is not supported when working mode is server.") return self._graph_data.get_all_edges(edge_type).as_array() @check_gnn_get_nodes_from_edges @@ -154,7 +154,7 @@ class GraphData: TypeError: If `edge_list` is not list or ndarray. """ if self._working_mode == 'server': - raise Exception("This method is not supported when working mode is server") + raise Exception("This method is not supported when working mode is server.") return self._graph_data.get_nodes_from_edges(edge_list).as_array() @check_gnn_get_all_neighbors @@ -181,7 +181,7 @@ class GraphData: TypeError: If `neighbor_type` is not integer. """ if self._working_mode == 'server': - raise Exception("This method is not supported when working mode is server") + raise Exception("This method is not supported when working mode is server.") return self._graph_data.get_all_neighbors(node_list, neighbor_type).as_array() @check_gnn_get_sampled_neighbors @@ -216,7 +216,7 @@ class GraphData: TypeError: If `neighbor_types` is not list or ndarray. """ if self._working_mode == 'server': - raise Exception("This method is not supported when working mode is server") + raise Exception("This method is not supported when working mode is server.") return self._graph_data.get_sampled_neighbors( node_list, neighbor_nums, neighbor_types).as_array() @@ -246,7 +246,7 @@ class GraphData: TypeError: If `neg_neighbor_type` is not integer. """ if self._working_mode == 'server': - raise Exception("This method is not supported when working mode is server") + raise Exception("This method is not supported when working mode is server.") return self._graph_data.get_neg_sampled_neighbors( node_list, neg_neighbor_num, neg_neighbor_type).as_array() @@ -274,7 +274,7 @@ class GraphData: TypeError: If `feature_types` is not list or ndarray. """ if self._working_mode == 'server': - raise Exception("This method is not supported when working mode is server") + raise Exception("This method is not supported when working mode is server.") if isinstance(node_list, list): node_list = np.array(node_list, dtype=np.int32) return [ @@ -306,7 +306,7 @@ class GraphData: TypeError: If `feature_types` is not list or ndarray. """ if self._working_mode == 'server': - raise Exception("This method is not supported when working mode is server") + raise Exception("This method is not supported when working mode is server.") if isinstance(edge_list, list): edge_list = np.array(edge_list, dtype=np.int32) return [ @@ -324,7 +324,7 @@ class GraphData: node_feature_type and edge_feature_type. """ if self._working_mode == 'server': - raise Exception("This method is not supported when working mode is server") + raise Exception("This method is not supported when working mode is server.") return self._graph_data.graph_info() @check_gnn_random_walk @@ -360,6 +360,6 @@ class GraphData: TypeError: If `meta_path` is not list or ndarray. """ if self._working_mode == 'server': - raise Exception("This method is not supported when working mode is server") + raise Exception("This method is not supported when working mode is server.") return self._graph_data.random_walk(target_nodes, meta_path, step_home_param, step_away_param, default_node).as_array() diff --git a/mindspore/dataset/engine/iterators.py b/mindspore/dataset/engine/iterators.py index 1d358a9e6f..0b6ad989b5 100644 --- a/mindspore/dataset/engine/iterators.py +++ b/mindspore/dataset/engine/iterators.py @@ -29,20 +29,25 @@ from . import datasets as de _ITERATOR_CLEANUP = False + def _set_iterator_cleanup(): global _ITERATOR_CLEANUP _ITERATOR_CLEANUP = True + def _unset_iterator_cleanup(): global _ITERATOR_CLEANUP _ITERATOR_CLEANUP = False + def check_iterator_cleanup(): global _ITERATOR_CLEANUP return _ITERATOR_CLEANUP + ITERATORS_LIST = list() + def _cleanup(): """Release all the Iterator.""" _set_iterator_cleanup() @@ -51,6 +56,7 @@ def _cleanup(): if itr is not None: itr.release() + def alter_tree(node): """Traversing the Python dataset tree/graph to perform some alteration to some specific nodes.""" if not node.children: @@ -73,6 +79,7 @@ def _alter_node(node): node.iterator_bootstrap() return node + class Iterator: """ General Iterator over a dataset. @@ -93,7 +100,7 @@ class Iterator: # The dataset passed into the iterator is not the root of the tree. # Trim the tree by saving the parent subtree into self.parent_subtree and - # restore it after launching our c++ pipeline. + # restore it after launching our C++ pipeline. if self.dataset.parent: logger.info("The dataset passed in is not the root of the pipeline. Ignoring parent subtree.") self.parent_subtree = self.dataset.parent @@ -101,7 +108,7 @@ class Iterator: self.dataset = alter_tree(self.dataset) if not self.__is_tree(): - raise ValueError("The data pipeline is not a tree (i.e., one node has 2 consumers)") + raise ValueError("The data pipeline is not a tree (i.e., one node has 2 consumers).") self.depipeline = DEPipeline() # for manifest temporary use @@ -116,7 +123,7 @@ class Iterator: """ Manually terminate Python iterator instead of relying on out of scope destruction. """ - logger.info("terminating Python iterator. This will also terminate c++ pipeline.") + logger.info("Terminating Python iterator. This will also terminate C++ pipeline.") if hasattr(self, 'depipeline') and self.depipeline: del self.depipeline @@ -205,7 +212,7 @@ class Iterator: elif isinstance(dataset, de.CSVDataset): op_type = OpName.CSV else: - raise ValueError("Unsupported DatasetOp") + raise ValueError("Unsupported DatasetOp.") return op_type @@ -256,9 +263,9 @@ class Iterator: def __next__(self): if not self.depipeline: - logger.warning("Iterator does not have a running c++ pipeline." + - "It can be because Iterator stop() had been called, or c++ pipeline crashed silently.") - raise RuntimeError("Iterator does not have a running c++ pipeline.") + logger.warning("Iterator does not have a running C++ pipeline." + + "It might because Iterator stop() had been called, or C++ pipeline crashed silently.") + raise RuntimeError("Iterator does not have a running C++ pipeline.") data = self.get_next() if not data: @@ -298,6 +305,7 @@ class Iterator: def __deepcopy__(self, memo): return self + class SaveOp(Iterator): """ The derived class of Iterator with dict type. @@ -375,7 +383,7 @@ class TupleIterator(Iterator): return [Tensor(t.as_array()) for t in self.depipeline.GetNextAsList()] -class DummyIterator(): +class DummyIterator: """ A DummyIterator only work when env MS_ROLE="MS_PSERVER" or MS_ROLE="MS_SCHED" """ diff --git a/mindspore/dataset/engine/samplers.py b/mindspore/dataset/engine/samplers.py index d468ef822f..58005443a4 100644 --- a/mindspore/dataset/engine/samplers.py +++ b/mindspore/dataset/engine/samplers.py @@ -24,6 +24,7 @@ import numpy as np import mindspore._c_dataengine as cde import mindspore.dataset as ds + class Sampler: """ Base class for user defined sampler. @@ -245,22 +246,22 @@ class DistributedSampler(BuiltinSampler): def __init__(self, num_shards, shard_id, shuffle=True, num_samples=None, offset=-1): if num_shards <= 0: - raise ValueError("num_shards should be a positive integer value, but got num_shards={}".format(num_shards)) + raise ValueError("num_shards should be a positive integer value, but got num_shards:{}.".format(num_shards)) if shard_id < 0 or shard_id >= num_shards: - raise ValueError("shard_id is invalid, shard_id={}".format(shard_id)) + raise ValueError("shard_id should in range [0, {}], but got shard_id: {}.".format(num_shards, shard_id)) if not isinstance(shuffle, bool): - raise ValueError("shuffle should be a boolean value, but got shuffle={}".format(shuffle)) + raise ValueError("shuffle should be a boolean value, but got shuffle: {}.".format(shuffle)) if num_samples is not None: if num_samples <= 0: raise ValueError("num_samples should be a positive integer " - "value, but got num_samples={}".format(num_samples)) + "value, but got num_samples: {}.".format(num_samples)) if offset > num_shards: - raise ValueError("offset should be no more than num_shards={}, " - "but got offset={}".format(num_shards, offset)) + raise ValueError("offset should be no more than num_shards: {}, " + "but got offset: {}".format(num_shards, offset)) self.num_shards = num_shards self.shard_id = shard_id @@ -332,18 +333,18 @@ class PKSampler(BuiltinSampler): def __init__(self, num_val, num_class=None, shuffle=False, class_column='label', num_samples=None): if num_val <= 0: - raise ValueError("num_val should be a positive integer value, but got num_val={}".format(num_val)) + raise ValueError("num_val should be a positive integer value, but got num_val: {}.".format(num_val)) if num_class is not None: - raise NotImplementedError("Not support specify num_class") + raise NotImplementedError("Not supported to specify num_class for PKSampler.") if not isinstance(shuffle, bool): - raise ValueError("shuffle should be a boolean value, but got shuffle={}".format(shuffle)) + raise ValueError("shuffle should be a boolean value, but got shuffle: {}.".format(shuffle)) if num_samples is not None: if num_samples <= 0: raise ValueError("num_samples should be a positive integer " - "value, but got num_samples={}".format(num_samples)) + "value, but got num_samples: {}.".format(num_samples)) self.num_val = num_val self.shuffle = shuffle @@ -372,7 +373,7 @@ class PKSampler(BuiltinSampler): def create_for_minddataset(self): if not self.class_column or not isinstance(self.class_column, str): raise ValueError("class_column should be a not empty string value, \ - but got class_column={}".format(class_column)) + but got class_column: {}.".format(class_column)) num_samples = self.num_samples if self.num_samples is not None else 0 c_sampler = cde.MindrecordPkSampler(self.num_val, self.class_column, self.shuffle, num_samples) c_child_sampler = self.create_child_for_minddataset() @@ -404,12 +405,12 @@ class RandomSampler(BuiltinSampler): def __init__(self, replacement=False, num_samples=None): if not isinstance(replacement, bool): - raise ValueError("replacement should be a boolean value, but got replacement={}".format(replacement)) + raise ValueError("replacement should be a boolean value, but got replacement: {}.".format(replacement)) if num_samples is not None: if num_samples <= 0: raise ValueError("num_samples should be a positive integer " - "value, but got num_samples={}".format(num_samples)) + "value, but got num_samples: {}.".format(num_samples)) self.deterministic = False self.replacement = replacement @@ -462,12 +463,12 @@ class SequentialSampler(BuiltinSampler): if num_samples is not None: if num_samples <= 0: raise ValueError("num_samples should be a positive integer " - "value, but got num_samples={}".format(num_samples)) + "value, but got num_samples: {}.".format(num_samples)) if start_index is not None: if start_index < 0: raise ValueError("start_index should be a positive integer " - "value or 0, but got start_index={}".format(start_index)) + "value or 0, but got start_index: {}.".format(start_index)) self.start_index = start_index super().__init__(num_samples) @@ -517,7 +518,7 @@ class SubsetRandomSampler(BuiltinSampler): >>> indices = [0, 1, 2, 3, 7, 88, 119] >>> >>> # creates a SubsetRandomSampler, will sample from the provided indices - >>> sampler = ds.SubsetRandomSampler() + >>> sampler = ds.SubsetRandomSampler(indices) >>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler) """ @@ -525,7 +526,7 @@ class SubsetRandomSampler(BuiltinSampler): if num_samples is not None: if num_samples <= 0: raise ValueError("num_samples should be a positive integer " - "value, but got num_samples={}".format(num_samples)) + "value, but got num_samples: {}.".format(num_samples)) if not isinstance(indices, list): indices = [indices] @@ -595,24 +596,24 @@ class WeightedRandomSampler(BuiltinSampler): for ind, w in enumerate(weights): if not isinstance(w, numbers.Number): raise TypeError("type of weights element should be number, " - "but got w[{}]={}, type={}".format(ind, w, type(w))) + "but got w[{}]: {}, type: {}.".format(ind, w, type(w))) if weights == []: raise ValueError("weights size should not be 0") if list(filter(lambda x: x < 0, weights)) != []: - raise ValueError("weights should not contain negative numbers") + raise ValueError("weights should not contain negative numbers.") if list(filter(lambda x: x == 0, weights)) == weights: - raise ValueError("elements of weights should not be all zero") + raise ValueError("elements of weights should not be all zeros.") if num_samples is not None: if num_samples <= 0: raise ValueError("num_samples should be a positive integer " - "value, but got num_samples={}".format(num_samples)) + "value, but got num_samples: {}.".format(num_samples)) if not isinstance(replacement, bool): - raise ValueError("replacement should be a boolean value, but got replacement={}".format(replacement)) + raise ValueError("replacement should be a boolean value, but got replacement: {}.".format(replacement)) self.weights = weights self.replacement = replacement diff --git a/mindspore/dataset/engine/serializer_deserializer.py b/mindspore/dataset/engine/serializer_deserializer.py index 3b3c92c50f..a77fe712e2 100644 --- a/mindspore/dataset/engine/serializer_deserializer.py +++ b/mindspore/dataset/engine/serializer_deserializer.py @@ -348,15 +348,15 @@ def create_node(node): elif dataset_op == 'CacheDataset': # Member function cache() is not defined in class Dataset yet. - raise RuntimeError(dataset_op + " is not yet supported") + raise RuntimeError(dataset_op + " is not yet supported.") elif dataset_op == 'FilterDataset': # Member function filter() is not defined in class Dataset yet. - raise RuntimeError(dataset_op + " is not yet supported") + raise RuntimeError(dataset_op + " is not yet supported.") elif dataset_op == 'TakeDataset': # Member function take() is not defined in class Dataset yet. - raise RuntimeError(dataset_op + " is not yet supported") + raise RuntimeError(dataset_op + " is not yet supported.") elif dataset_op == 'ZipDataset': # Create ZipDataset instance, giving dummy input dataset that will be overrided in the caller. @@ -376,7 +376,7 @@ def create_node(node): pyobj = de.Dataset().to_device() else: - raise RuntimeError(dataset_op + " is not yet supported by ds.engine.deserialize()") + raise RuntimeError(dataset_op + " is not yet supported by ds.engine.deserialize().") return pyobj @@ -401,7 +401,7 @@ def construct_sampler(in_sampler): elif sampler_name == 'WeightedRandomSampler': sampler = sampler_class(in_sampler['weights'], in_sampler['num_samples'], in_sampler.get('replacement')) else: - raise ValueError("Sampler type is unknown: " + sampler_name) + raise ValueError("Sampler type is unknown: {}.".format(sampler_name)) return sampler @@ -461,7 +461,7 @@ def construct_tensor_ops(operations): result.append(op_class()) elif op_name == 'CHW2HWC': - raise ValueError("Tensor op is not supported: " + op_name) + raise ValueError("Tensor op is not supported: {}.".format(op_name)) elif op_name == 'OneHot': result.append(op_class(op['num_classes'])) @@ -474,6 +474,6 @@ def construct_tensor_ops(operations): result.append(op_class(op['padding'], op['fill_value'], Border(op['padding_mode']))) else: - raise ValueError("Tensor op name is unknown: " + op_name) + raise ValueError("Tensor op name is unknown: {}.".format(op_name)) return result diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py index b026967ec1..d106d8be47 100644 --- a/mindspore/dataset/engine/validators.py +++ b/mindspore/dataset/engine/validators.py @@ -134,7 +134,7 @@ def check_tfrecorddataset(method): dataset_files = param_dict.get('dataset_files') if not isinstance(dataset_files, (str, list)): - raise TypeError("dataset_files should be of type str or a list of strings.") + raise TypeError("dataset_files should be type str or a list of strings.") validate_dataset_param_value(nreq_param_int, param_dict, int) validate_dataset_param_value(nreq_param_list, param_dict, list) @@ -173,11 +173,11 @@ def check_vocdataset(method): if task == "Segmentation": imagesets_file = os.path.join(dataset_dir, "ImageSets", "Segmentation", usage + ".txt") if param_dict.get('class_indexing') is not None: - raise ValueError("class_indexing is invalid in Segmentation task") + raise ValueError("class_indexing is not supported in Segmentation task.") elif task == "Detection": imagesets_file = os.path.join(dataset_dir, "ImageSets", "Main", usage + ".txt") else: - raise ValueError("Invalid task : " + task) + raise ValueError("Invalid task : " + task + ".") check_file(imagesets_file) @@ -214,7 +214,7 @@ def check_cocodataset(method): type_check(task, (str,), "task") if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint'}: - raise ValueError("Invalid task type") + raise ValueError("Invalid task type: " + task + ".") validate_dataset_param_value(nreq_param_int, param_dict, int) @@ -222,7 +222,7 @@ def check_cocodataset(method): sampler = param_dict.get('sampler') if sampler is not None and isinstance(sampler, samplers.PKSampler): - raise ValueError("CocoDataset doesn't support PKSampler") + raise ValueError("CocoDataset doesn't support PKSampler.") check_sampler_shuffle_shard_options(param_dict) cache = param_dict.get('cache') @@ -256,13 +256,13 @@ def check_celebadataset(method): usage = param_dict.get('usage') if usage is not None and usage not in ('all', 'train', 'valid', 'test'): - raise ValueError("usage should be one of 'all', 'train', 'valid' or 'test'.") + raise ValueError("usage should be 'all', 'train', 'valid' or 'test'.") check_sampler_shuffle_shard_options(param_dict) sampler = param_dict.get('sampler') if sampler is not None and isinstance(sampler, samplers.PKSampler): - raise ValueError("CelebADataset does not support PKSampler.") + raise ValueError("CelebADataset doesn't support PKSampler.") cache = param_dict.get('cache') check_cache_option(cache) @@ -350,14 +350,14 @@ def check_generatordataset(method): try: iter(source) except TypeError: - raise TypeError("source should be callable, iterable or random accessible") + raise TypeError("source should be callable, iterable or random accessible.") column_names = param_dict.get('column_names') if column_names is not None: check_columns(column_names, "column_names") schema = param_dict.get('schema') if column_names is None and schema is None: - raise ValueError("Neither columns_names not schema are provided.") + raise ValueError("Neither columns_names nor schema are provided.") if schema is not None: if not isinstance(schema, datasets.Schema) and not isinstance(schema, str): @@ -375,7 +375,7 @@ def check_generatordataset(method): shard_id = param_dict.get("shard_id") if (num_shards is None) != (shard_id is None): # These two parameters appear together. - raise ValueError("num_shards and shard_id need to be passed in together") + raise ValueError("num_shards and shard_id need to be passed in together.") if num_shards is not None: check_pos_int32(num_shards, "num_shards") if shard_id >= num_shards: @@ -384,19 +384,19 @@ def check_generatordataset(method): sampler = param_dict.get("sampler") if sampler is not None: if isinstance(sampler, samplers.PKSampler): - raise ValueError("PKSampler is not supported by GeneratorDataset") + raise ValueError("GeneratorDataset doesn't support PKSampler.") if not isinstance(sampler, (samplers.SequentialSampler, samplers.DistributedSampler, samplers.RandomSampler, samplers.SubsetRandomSampler, samplers.WeightedRandomSampler, samplers.Sampler)): try: iter(sampler) except TypeError: - raise TypeError("sampler should be either iterable or from mindspore.dataset.samplers") + raise TypeError("sampler should be either iterable or from mindspore.dataset.samplers.") if sampler is not None and not hasattr(source, "__getitem__"): - raise ValueError("sampler is not supported if source does not have attribute '__getitem__'") + raise ValueError("sampler is not supported if source does not have attribute '__getitem__'.") if num_shards is not None and not hasattr(source, "__getitem__"): - raise ValueError("num_shards is not supported if source does not have attribute '__getitem__'") + raise ValueError("num_shards is not supported if source does not have attribute '__getitem__'.") return method(self, *args, **kwargs) @@ -433,7 +433,7 @@ def check_pad_info(key, val): type_check(key, (str,), "key in pad_info") if val is not None: - assert len(val) == 2, "value of pad_info should be a tuple of size 2" + assert len(val) == 2, "value of pad_info should be a tuple of size 2." type_check(val, (tuple,), "value in pad_info") if val[0] is not None: @@ -521,14 +521,14 @@ def check_batch(method): if callable(batch_size): sig = ins.signature(batch_size) if len(sig.parameters) != 1: - raise ValueError("batch_size callable should take one parameter (BatchInfo).") + raise ValueError("callable batch_size should take one parameter (BatchInfo).") if num_parallel_workers is not None: check_num_parallel_workers(num_parallel_workers) type_check(drop_remainder, (bool,), "drop_remainder") if (pad_info is not None) and (per_batch_map is not None): - raise ValueError("pad_info and per_batch_map can't both be set") + raise ValueError("pad_info and per_batch_map can't both be set.") if pad_info is not None: type_check(param_dict["pad_info"], (dict,), "pad_info") @@ -542,7 +542,7 @@ def check_batch(method): if input_columns is not None: check_columns(input_columns, "input_columns") if len(input_columns) != (len(ins.signature(per_batch_map).parameters) - 1): - raise ValueError("the signature of per_batch_map should match with input columns") + raise ValueError("The signature of per_batch_map should match with input columns.") if output_columns is not None: check_columns(output_columns, "output_columns") @@ -816,13 +816,13 @@ def check_add_column(method): type_check(name, (str,), "name") if not name: - raise TypeError("Expected non-empty string.") + raise TypeError("Expected non-empty string for column name.") if de_type is not None: if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type): - raise TypeError("Unknown column type.") + raise TypeError("Unknown column type: {}.".format(de_type)) else: - raise TypeError("Expected non-empty string.") + raise TypeError("Expected non-empty string for de_type.") if shape is not None: type_check(shape, (list,), "shape") @@ -848,12 +848,12 @@ def check_cluedataset(method): # check task task_param = param_dict.get('task') if task_param not in ['AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC', 'CSL']: - raise ValueError("task should be AFQMC, TNEWS, IFLYTEK, CMNLI, WSC or CSL") + raise ValueError("task should be 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' or 'CSL'.") # check usage usage_param = param_dict.get('usage') if usage_param not in ['train', 'test', 'eval']: - raise ValueError("usage should be train, test or eval") + raise ValueError("usage should be 'train', 'test' or 'eval'.") validate_dataset_param_value(nreq_param_int, param_dict, int) check_sampler_shuffle_shard_options(param_dict) @@ -883,7 +883,7 @@ def check_csvdataset(method): field_delim = param_dict.get('field_delim') type_check(field_delim, (str,), 'field delim') if field_delim in ['"', '\r', '\n'] or len(field_delim) > 1: - raise ValueError("field_delim is not legal.") + raise ValueError("field_delim is invalid.") # check column_defaults column_defaults = param_dict.get('column_defaults') @@ -892,7 +892,7 @@ def check_csvdataset(method): raise TypeError("column_defaults should be type of list.") for item in column_defaults: if not isinstance(item, (str, int, float)): - raise TypeError("column type is not legal.") + raise TypeError("column type in column_defaults is invalid.") # check column_names: must be list of string. column_names = param_dict.get("column_names") @@ -997,7 +997,7 @@ def check_gnn_graphdata(method): raise ValueError("The hostname is illegal") type_check(working_mode, (str,), "working_mode") if working_mode not in {'local', 'client', 'server'}: - raise ValueError("Invalid working mode, please enter 'local', 'client' or 'server'") + raise ValueError("Invalid working mode, please enter 'local', 'client' or 'server'.") type_check(port, (int,), "port") check_value(port, (1024, 65535), "port") type_check(num_client, (int,), "num_client") @@ -1073,17 +1073,17 @@ def check_gnn_get_sampled_neighbors(method): check_gnn_list_or_ndarray(neighbor_nums, 'neighbor_nums') if not neighbor_nums or len(neighbor_nums) > 6: - raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}".format( + raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}.".format( 'neighbor_nums', len(neighbor_nums))) check_gnn_list_or_ndarray(neighbor_types, 'neighbor_types') if not neighbor_types or len(neighbor_types) > 6: - raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}".format( + raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}.".format( 'neighbor_types', len(neighbor_types))) if len(neighbor_nums) != len(neighbor_types): raise ValueError( - "The number of members of neighbor_nums and neighbor_types is inconsistent") + "The number of members of neighbor_nums and neighbor_types is inconsistent.") return method(self, *args, **kwargs) @@ -1139,17 +1139,17 @@ def check_aligned_list(param, param_name, member_type): check_aligned_list(member, param_name, member_type) if member_have_list not in (None, True): - raise TypeError("The type of each member of the parameter {0} is inconsistent".format( + raise TypeError("The type of each member of the parameter {0} is inconsistent.".format( param_name)) if list_len is not None and len(member) != list_len: - raise TypeError("The size of each member of parameter {0} is inconsistent".format( + raise TypeError("The size of each member of parameter {0} is inconsistent.".format( param_name)) member_have_list = True list_len = len(member) else: type_check(member, (member_type,), param_name) if member_have_list not in (None, False): - raise TypeError("The type of each member of the parameter {0} is inconsistent".format( + raise TypeError("The type of each member of the parameter {0} is inconsistent.".format( param_name)) member_have_list = False @@ -1248,7 +1248,7 @@ def check_paddeddataset(method): padded_samples = param_dict.get("padded_samples") if not padded_samples: - raise ValueError("Argument padded_samples cannot be empty") + raise ValueError("padded_samples cannot be empty.") type_check(padded_samples, (list,), "padded_samples") type_check(padded_samples[0], (dict,), "padded_element") return method(self, *args, **kwargs) @@ -1261,6 +1261,6 @@ def check_cache_option(cache): if cache is not None: if os.getenv('MS_ENABLE_CACHE') != 'TRUE': # temporary disable cache feature in the current release - raise ValueError("Caching is disabled in the current release") + raise ValueError("Caching is disabled in the current release.") from . import cache_client type_check(cache, (cache_client.DatasetCache,), "cache") diff --git a/mindspore/dataset/text/transforms.py b/mindspore/dataset/text/transforms.py index 56ceb736b0..5d3e160a6c 100644 --- a/mindspore/dataset/text/transforms.py +++ b/mindspore/dataset/text/transforms.py @@ -257,7 +257,7 @@ class JiebaTokenizer(cde.JiebaTokenizerOp): for k, v in user_dict.items(): self.add_word(k, v) else: - raise ValueError("the type of user_dict must str or dict") + raise TypeError("The type of user_dict must str or dict.") def __add_dict_py_file(self, file_path): """Add user defined word by file""" @@ -273,7 +273,7 @@ class JiebaTokenizer(cde.JiebaTokenizerOp): """parser user defined word by file""" if not os.path.exists(file_path): raise ValueError( - "user dict file {} is not exist".format(file_path)) + "user dict file {} is not exist.".format(file_path)) real_file_path = os.path.realpath(file_path) file_dict = open(real_file_path) data_re = re.compile('^(.+?)( [0-9]+)?$', re.U) @@ -285,7 +285,7 @@ class JiebaTokenizer(cde.JiebaTokenizerOp): words = data_re.match(data).groups() if len(words) != 2: raise ValueError( - "user dict file {} format error".format(real_file_path)) + "user dict file {} format error.".format(real_file_path)) words_list.append(words) file_dict.close() return words_list @@ -295,14 +295,14 @@ class JiebaTokenizer(cde.JiebaTokenizerOp): try: data = data.decode('utf-8') except UnicodeDecodeError: - raise ValueError("user dict file must utf8") + raise ValueError("user dict file must be utf8 format.") return data.lstrip('\ufeff') def __check_path__(self, model_path): """check model path""" if not os.path.exists(model_path): raise ValueError( - " jieba mode file {} is not exist".format(model_path)) + " jieba mode file {} is not exist.".format(model_path)) class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp): @@ -528,7 +528,7 @@ if platform.system().lower() != 'windows': def __init__(self, normalize_form=NormalizeForm.NFKC): if not isinstance(normalize_form, NormalizeForm): - raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.") + raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.") self.normalize_form = DE_C_INTER_NORMALIZE_FORM[normalize_form] super().__init__(self.normalize_form) @@ -650,7 +650,7 @@ if platform.system().lower() != 'windows': def __init__(self, lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE, preserve_unused_token=True, with_offsets=False): if not isinstance(normalization_form, NormalizeForm): - raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.") + raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.") self.lower_case = lower_case self.keep_whitespace = keep_whitespace @@ -710,7 +710,7 @@ if platform.system().lower() != 'windows': lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE, preserve_unused_token=True, with_offsets=False): if not isinstance(normalization_form, NormalizeForm): - raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.") + raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.") self.vocab = vocab self.suffix_indicator = suffix_indicator diff --git a/mindspore/dataset/text/validators.py b/mindspore/dataset/text/validators.py index 186d187a05..1690c8fe19 100644 --- a/mindspore/dataset/text/validators.py +++ b/mindspore/dataset/text/validators.py @@ -417,7 +417,7 @@ def check_python_tokenizer(method): [tokenizer], _ = parse_user_args(method, *args, **kwargs) if not callable(tokenizer): - raise TypeError("tokenizer is not a callable Python function") + raise TypeError("tokenizer is not a callable Python function.") return method(self, *args, **kwargs) @@ -437,8 +437,7 @@ def check_from_dataset_sentencepiece(method): if vocab_size is not None: check_uint32(vocab_size, "vocab_size") else: - raise TypeError("vocab_size must be provided") - + raise TypeError("vocab_size must be provided.") if character_coverage is not None: type_check(character_coverage, (float,), "character_coverage") diff --git a/mindspore/dataset/transforms/py_transforms_util.py b/mindspore/dataset/transforms/py_transforms_util.py index bc331e0467..ed5eba5eb1 100644 --- a/mindspore/dataset/transforms/py_transforms_util.py +++ b/mindspore/dataset/transforms/py_transforms_util.py @@ -49,7 +49,7 @@ def compose(transforms, *args): if all_numpy(args): return args - raise TypeError('args should be Numpy ndarray. Got {}. Append ToTensor() to transforms'.format(type(args))) + raise TypeError('args should be Numpy ndarray. Got {}. Append ToTensor() to transforms.'.format(type(args))) raise TypeError('args should be Numpy ndarray. Got {}.'.format(type(args))) diff --git a/mindspore/dataset/vision/py_transforms.py b/mindspore/dataset/vision/py_transforms.py index b1630d4088..c476301c35 100644 --- a/mindspore/dataset/vision/py_transforms.py +++ b/mindspore/dataset/vision/py_transforms.py @@ -971,7 +971,7 @@ class Cutout: np_img (numpy.ndarray), NumPy image array with square patches cut out. """ if not isinstance(np_img, np.ndarray): - raise TypeError('img should be NumPy array. Got {}'.format(type(np_img))) + raise TypeError("img should be NumPy array. Got {}.".format(type(np_img))) _, image_h, image_w = np_img.shape scale = (self.length * self.length) / (image_h * image_w) bounded = False diff --git a/mindspore/dataset/vision/py_transforms_util.py b/mindspore/dataset/vision/py_transforms_util.py index 5de840a30f..a4fd693fef 100644 --- a/mindspore/dataset/vision/py_transforms_util.py +++ b/mindspore/dataset/vision/py_transforms_util.py @@ -26,7 +26,7 @@ from PIL import Image, ImageOps, ImageEnhance, __version__ from .utils import Inter from ..core.py_util_helpers import is_numpy -augment_error_message = 'img should be PIL image. Got {}. Use Decode() for encoded data or ToPIL() for decoded data.' +augment_error_message = "img should be PIL image. Got {}. Use Decode() for encoded data or ToPIL() for decoded data." def is_pil(img): @@ -55,19 +55,19 @@ def normalize(img, mean, std): img (numpy.ndarray), Normalized image. """ if not is_numpy(img): - raise TypeError('img should be NumPy image. Got {}'.format(type(img))) + raise TypeError("img should be NumPy image. Got {}.".format(type(img))) num_channels = img.shape[0] # shape is (C, H, W) if len(mean) != len(std): - raise ValueError("Length of mean and std must be equal") + raise ValueError("Length of mean and std must be equal.") # if length equal to 1, adjust the mean and std arrays to have the correct # number of channels (replicate the values) if len(mean) == 1: mean = [mean[0]] * num_channels std = [std[0]] * num_channels elif len(mean) != num_channels: - raise ValueError("Length of mean and std must both be 1 or equal to the number of channels({0})" + raise ValueError("Length of mean and std must both be 1 or equal to the number of channels({0})." .format(num_channels)) mean = np.array(mean, dtype=img.dtype) @@ -108,7 +108,7 @@ def hwc_to_chw(img): """ if is_numpy(img): return img.transpose(2, 0, 1).copy() - raise TypeError('img should be NumPy array. Got {}'.format(type(img))) + raise TypeError('img should be NumPy array. Got {}.'.format(type(img))) def to_tensor(img, output_type): @@ -123,11 +123,11 @@ def to_tensor(img, output_type): img (numpy.ndarray), Converted image. """ if not (is_pil(img) or is_numpy(img)): - raise TypeError('img should be PIL image or NumPy array. Got {}'.format(type(img))) + raise TypeError("img should be PIL image or NumPy array. Got {}.".format(type(img))) img = np.asarray(img) if img.ndim not in (2, 3): - raise ValueError('img dimension should be 2 or 3. Got {}'.format(img.ndim)) + raise ValueError("img dimension should be 2 or 3. Got {}.".format(img.ndim)) if img.ndim == 2: img = img[:, :, None] @@ -265,7 +265,7 @@ def resize(img, size, interpolation=Inter.BILINEAR): raise TypeError(augment_error_message.format(type(img))) if not (isinstance(size, int) or (isinstance(size, (list, tuple)) and len(size) == 2)): raise TypeError('Size should be a single number or a list/tuple (h, w) of length 2.' - 'Got {}'.format(size)) + 'Got {}.'.format(size)) if isinstance(size, int): img_width, img_height = img.size @@ -424,7 +424,7 @@ def random_crop(img, size, padding, pad_if_needed, fill_value, padding_mode): img_width, img_height = img.size height, width = size if height > img_height or width > img_width: - raise ValueError("Crop size {} is larger than input image size {}".format(size, (img_height, img_width))) + raise ValueError("Crop size {} is larger than input image size {}.".format(size, (img_height, img_width))) if width == img_width and height == img_height: return 0, 0, img_height, img_width @@ -558,7 +558,7 @@ def to_type(img, output_type): img (numpy.ndarray), Converted image. """ if not is_numpy(img): - raise TypeError('img should be NumPy image. Got {}'.format(type(img))) + raise TypeError("img should be NumPy image. Got {}.".format(type(img))) return img.astype(output_type) @@ -632,7 +632,7 @@ def random_color_adjust(img, brightness, contrast, saturation, hue): elif isinstance(value, (list, tuple)) and len(value) == 2: if not bound[0] <= value[0] <= value[1] <= bound[1]: raise ValueError("Please check your value range of {} is valid and " - "within the bound {}".format(input_name, bound)) + "within the bound {}.".format(input_name, bound)) else: raise TypeError("Input of {} should be either a single value, or a list/tuple of " "length 2.".format(input_name)) @@ -695,7 +695,7 @@ def random_rotation(img, degrees, resample, expand, center, fill_value): if len(degrees) != 2: raise ValueError("If degrees is a sequence, the length must be 2.") else: - raise TypeError("Degrees must be a single non-negative number or a sequence") + raise TypeError("Degrees must be a single non-negative number or a sequence.") angle = random.uniform(degrees[0], degrees[1]) return rotate(img, angle, resample, expand, center, fill_value) @@ -729,7 +729,7 @@ def five_crop(img, size): img_width, img_height = img.size crop_height, crop_width = size if crop_height > img_height or crop_width > img_width: - raise ValueError("Crop size {} is larger than input image size {}".format(size, (img_height, img_width))) + raise ValueError("Crop size {} is larger than input image size {}.".format(size, (img_height, img_width))) center = center_crop(img, (crop_height, crop_width)) top_left = img.crop((0, 0, crop_width, crop_height)) top_right = img.crop((img_width - crop_width, 0, img_width, crop_height)) @@ -802,7 +802,7 @@ def grayscale(img, num_output_channels): np_img = np.dstack([np_gray, np_gray, np_gray]) img = Image.fromarray(np_img, 'RGB') else: - raise ValueError('num_output_channels should be either 1 or 3. Got {}'.format(num_output_channels)) + raise ValueError('num_output_channels should be either 1 or 3. Got {}.'.format(num_output_channels)) return img @@ -859,7 +859,7 @@ def pad(img, padding, fill_value, padding_mode): raise TypeError("fill_value can be any of: an integer, a string or a tuple.") if padding_mode not in ['constant', 'edge', 'reflect', 'symmetric']: - raise ValueError("Padding mode can be any of ['constant', 'edge', 'reflect', 'symmetric'].") + raise ValueError("Padding mode should be 'constant', 'edge', 'reflect', or 'symmetric'.") if padding_mode == 'constant': if img.mode == 'P': @@ -946,7 +946,7 @@ def get_erase_params(np_img, scale, ratio, value, bounded, max_attempts): """Helper function to get parameters for RandomErasing/ Cutout. """ if not is_numpy(np_img): - raise TypeError('img should be NumPy array. Got {}'.format(type(np_img))) + raise TypeError('img should be NumPy array. Got {}.'.format(type(np_img))) image_c, image_h, image_w = np_img.shape area = image_h * image_w @@ -1009,7 +1009,7 @@ def erase(np_img, i, j, height, width, erase_value, inplace=False): np_img (numpy.ndarray), Erased NumPy image array. """ if not is_numpy(np_img): - raise TypeError('img should be NumPy array. Got {}'.format(type(np_img))) + raise TypeError('img should be NumPy array. Got {}.'.format(type(np_img))) if not inplace: np_img = np_img.copy() @@ -1111,7 +1111,7 @@ def random_affine(img, angle, translations, scale, shear, resample, fill_value=0 else: raise ValueError( "Shear should be a single value or a tuple/list containing " + - "two values. Got {}".format(shear)) + "two values. Got {}.".format(shear)) scale = 1.0 / scale @@ -1239,13 +1239,13 @@ def rgb_to_hsvs(np_rgb_imgs, is_hwc): np_hsv_imgs (numpy.ndarray), NumPy HSV images with same type of np_rgb_imgs. """ if not is_numpy(np_rgb_imgs): - raise TypeError('img should be NumPy image. Got {}'.format(type(np_rgb_imgs))) + raise TypeError("img should be NumPy image. Got {}".format(type(np_rgb_imgs))) shape_size = len(np_rgb_imgs.shape) if not shape_size in (3, 4): - raise TypeError('img shape should be (H, W, C)/(N, H, W, C)/(C ,H, W)/(N, C, H, W). \ - Got {}'.format(np_rgb_imgs.shape)) + raise TypeError("img shape should be (H, W, C)/(N, H, W, C)/(C ,H, W)/(N, C, H, W). \ + Got {}.".format(np_rgb_imgs.shape)) if shape_size == 3: batch_size = 0 @@ -1261,7 +1261,7 @@ def rgb_to_hsvs(np_rgb_imgs, is_hwc): num_channels = np_rgb_imgs.shape[1] if num_channels != 3: - raise TypeError('img should be 3 channels RGB img. Got {} channels'.format(num_channels)) + raise TypeError("img should be 3 channels RGB img. Got {} channels.".format(num_channels)) if batch_size == 0: return rgb_to_hsv(np_rgb_imgs, is_hwc) return np.array([rgb_to_hsv(img, is_hwc) for img in np_rgb_imgs]) @@ -1307,13 +1307,13 @@ def hsv_to_rgbs(np_hsv_imgs, is_hwc): np_rgb_imgs (numpy.ndarray), NumPy RGB images with same type of np_hsv_imgs. """ if not is_numpy(np_hsv_imgs): - raise TypeError('img should be NumPy image. Got {}'.format(type(np_hsv_imgs))) + raise TypeError("img should be NumPy image. Got {}.".format(type(np_hsv_imgs))) shape_size = len(np_hsv_imgs.shape) if not shape_size in (3, 4): - raise TypeError('img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \ - Got {}'.format(np_hsv_imgs.shape)) + raise TypeError("img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \ + Got {}.".format(np_hsv_imgs.shape)) if shape_size == 3: batch_size = 0 @@ -1329,7 +1329,7 @@ def hsv_to_rgbs(np_hsv_imgs, is_hwc): num_channels = np_hsv_imgs.shape[1] if num_channels != 3: - raise TypeError('img should be 3 channels RGB img. Got {} channels'.format(num_channels)) + raise TypeError("img should be 3 channels RGB img. Got {} channels.".format(num_channels)) if batch_size == 0: return hsv_to_rgb(np_hsv_imgs, is_hwc) return np.array([hsv_to_rgb(img, is_hwc) for img in np_hsv_imgs]) @@ -1349,7 +1349,7 @@ def random_color(img, degrees): """ if not is_pil(img): - raise TypeError('img should be PIL image. Got {}'.format(type(img))) + raise TypeError("img should be PIL image. Got {}.".format(type(img))) v = (degrees[1] - degrees[0]) * random.random() + degrees[0] return ImageEnhance.Color(img).enhance(v) @@ -1369,7 +1369,7 @@ def random_sharpness(img, degrees): """ if not is_pil(img): - raise TypeError('img should be PIL image. Got {}'.format(type(img))) + raise TypeError("img should be PIL image. Got {}.".format(type(img))) v = (degrees[1] - degrees[0]) * random.random() + degrees[0] return ImageEnhance.Sharpness(img).enhance(v) @@ -1390,7 +1390,7 @@ def auto_contrast(img, cutoff, ignore): """ if not is_pil(img): - raise TypeError('img should be PIL image. Got {}'.format(type(img))) + raise TypeError("img should be PIL image. Got {}.".format(type(img))) return ImageOps.autocontrast(img, cutoff, ignore) @@ -1408,7 +1408,7 @@ def invert_color(img): """ if not is_pil(img): - raise TypeError('img should be PIL image. Got {}'.format(type(img))) + raise TypeError("img should be PIL image. Got {}.".format(type(img))) return ImageOps.invert(img) @@ -1426,7 +1426,7 @@ def equalize(img): """ if not is_pil(img): - raise TypeError('img should be PIL image. Got {}'.format(type(img))) + raise TypeError("img should be PIL image. Got {}.".format(type(img))) return ImageOps.equalize(img) diff --git a/mindspore/dataset/vision/validators.py b/mindspore/dataset/vision/validators.py index 8060c62145..e65df54ad0 100644 --- a/mindspore/dataset/vision/validators.py +++ b/mindspore/dataset/vision/validators.py @@ -79,7 +79,7 @@ def check_mix_up_batch_c(method): def check_normalize_c_param(mean, std): if len(mean) != len(std): - raise ValueError("Length of mean and std must be equal") + raise ValueError("Length of mean and std must be equal.") for mean_value in mean: check_pos_float32(mean_value) for std_value in std: @@ -88,7 +88,7 @@ def check_normalize_c_param(mean, std): def check_normalize_py_param(mean, std): if len(mean) != len(std): - raise ValueError("Length of mean and std must be equal") + raise ValueError("Length of mean and std must be equal.") for mean_value in mean: check_value(mean_value, [0., 1.], "mean_value") for std_value in std: @@ -372,7 +372,7 @@ def check_num_channels(method): if num_output_channels is not None: if num_output_channels not in (1, 3): raise ValueError("Number of channels of the output grayscale image" - "should be either 1 or 3. Got {0}".format(num_output_channels)) + "should be either 1 or 3. Got {0}.".format(num_output_channels)) return method(self, *args, **kwargs) @@ -471,7 +471,7 @@ def check_linear_transform(method): if transformation_matrix.shape[0] != transformation_matrix.shape[1]: raise ValueError("transformation_matrix should be a square matrix. " - "Got shape {} instead".format(transformation_matrix.shape)) + "Got shape {} instead.".format(transformation_matrix.shape)) if mean_vector.shape[0] != transformation_matrix.shape[0]: raise ValueError("mean_vector length {0} should match either one dimension of the square" "transformation_matrix {1}.".format(mean_vector.shape[0], transformation_matrix.shape)) @@ -556,7 +556,7 @@ def check_uniform_augment_cpp(method): check_positive(num_ops, "num_ops") if num_ops > len(transforms): - raise ValueError("num_ops is greater than transforms list size") + raise ValueError("num_ops is greater than transforms list size.") type_check_list(transforms, (TensorOp,), "tensor_ops") return method(self, *args, **kwargs) @@ -693,11 +693,11 @@ def check_random_solarize(method): type_check(threshold, (tuple,), "threshold") type_check_list(threshold, (int,), "threshold") if len(threshold) != 2: - raise ValueError("threshold must be a sequence of two numbers") + raise ValueError("threshold must be a sequence of two numbers.") for element in threshold: check_value(element, (0, UINT8_MAX)) if threshold[1] < threshold[0]: - raise ValueError("threshold must be in min max format numbers") + raise ValueError("threshold must be in min max format numbers.") return method(self, *args, **kwargs) diff --git a/tests/ut/python/dataset/test_c_compose.py b/tests/ut/python/dataset/test_c_compose.py index 20a45c6e9b..26b3488776 100644 --- a/tests/ut/python/dataset/test_c_compose.py +++ b/tests/ut/python/dataset/test_c_compose.py @@ -41,7 +41,7 @@ def test_compose(): # test one python transform followed by a C transform. type after oneHot is float (mixed use-case) assert test_config([1, 0], [py_ops.OneHotOp(2), ops.TypeCast(mstype.int32)]) == [[[0, 1]], [[1, 0]]] # test exceptions. compose, randomApply randomChoice use the same validator - assert "op_list[0] is not a c_transform op" in test_config([1, 0], [1, ops.TypeCast(mstype.int32)]) + assert "op_list[0] is neither a c_transform op" in test_config([1, 0], [1, ops.TypeCast(mstype.int32)]) # test empty op list assert "op_list can not be empty." in test_config([1, 0], []) diff --git a/tests/ut/python/dataset/test_compose.py b/tests/ut/python/dataset/test_compose.py index 688a9e6081..f7650f6ce3 100644 --- a/tests/ut/python/dataset/test_compose.py +++ b/tests/ut/python/dataset/test_compose.py @@ -63,7 +63,7 @@ def test_compose(): # Test exceptions. with pytest.raises(TypeError) as error_info: c_transforms.Compose([1, c_transforms.TypeCast(mstype.int32)]) - assert "op_list[0] is not a c_transform op (TensorOp) nor a callable pyfunc." in str(error_info.value) + assert "op_list[0] is neither a c_transform op (TensorOp) nor a callable pyfunc." in str(error_info.value) # Test empty op list with pytest.raises(ValueError) as error_info: diff --git a/tests/ut/python/dataset/test_datasets_generator.py b/tests/ut/python/dataset/test_datasets_generator.py index 5e8d185125..057b5b718c 100644 --- a/tests/ut/python/dataset/test_datasets_generator.py +++ b/tests/ut/python/dataset/test_datasets_generator.py @@ -510,7 +510,8 @@ def test_generator_error_3(): for _ in data1: pass - assert "When (len(input_columns) != len(output_columns)), column_order must be specified." in str(info.value) + assert "When length of input_columns and output_columns are not equal, column_order must be specified." in \ + str(info.value) def test_generator_error_4(): diff --git a/tests/ut/python/dataset/test_minddataset_exception.py b/tests/ut/python/dataset/test_minddataset_exception.py index c34f47df47..60a9316f4c 100644 --- a/tests/ut/python/dataset/test_minddataset_exception.py +++ b/tests/ut/python/dataset/test_minddataset_exception.py @@ -279,7 +279,7 @@ def test_cv_minddataset_partition_num_samples_equals_0(): with pytest.raises(Exception) as error_info: partitions(5) try: - assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value) + assert 'num_samples should be a positive integer value, but got num_samples: 0.' in str(error_info.value) except Exception as error: os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME)) diff --git a/tests/ut/python/dataset/test_normalizeOp.py b/tests/ut/python/dataset/test_normalizeOp.py index c4290c998d..204e91cb3c 100644 --- a/tests/ut/python/dataset/test_normalizeOp.py +++ b/tests/ut/python/dataset/test_normalizeOp.py @@ -242,7 +242,7 @@ def test_normalize_exception_unequal_size_c(): _ = c_vision.Normalize([100, 250, 125], [50, 50, 75, 75]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) - assert str(e) == "Length of mean and std must be equal" + assert str(e) == "Length of mean and std must be equal." def test_normalize_exception_unequal_size_py(): @@ -255,7 +255,7 @@ def test_normalize_exception_unequal_size_py(): _ = py_vision.Normalize([0.50, 0.30, 0.75], [0.18, 0.32, 0.71, 0.72]) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) - assert str(e) == "Length of mean and std must be equal" + assert str(e) == "Length of mean and std must be equal." def test_normalize_exception_invalid_size_py(): diff --git a/tests/ut/python/dataset/test_paddeddataset.py b/tests/ut/python/dataset/test_paddeddataset.py index 3de4b50833..0345bf2e72 100644 --- a/tests/ut/python/dataset/test_paddeddataset.py +++ b/tests/ut/python/dataset/test_paddeddataset.py @@ -483,7 +483,7 @@ def test_clue_padded_and_skip_with_0_samples(): count += 1 assert count == 0 - with pytest.raises(ValueError, match="There is no samples in the "): + with pytest.raises(ValueError, match="There are no samples in the "): dataset = dataset.concat(data_copy1) count = 0 for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): diff --git a/tests/ut/python/dataset/test_random_select_subpolicy.py b/tests/ut/python/dataset/test_random_select_subpolicy.py index 919f3cc81c..7ad4a096e7 100644 --- a/tests/ut/python/dataset/test_random_select_subpolicy.py +++ b/tests/ut/python/dataset/test_random_select_subpolicy.py @@ -41,8 +41,8 @@ def test_random_select_subpolicy(): # test exceptions assert "policy can not be empty." in test_config([[1, 2, 3]], []) assert "policy[0] can not be empty." in test_config([[1, 2, 3]], [[]]) - assert "op of (op, prob) in policy[1][0] is not a c_transform op (TensorOp) nor a callable pyfunc" in test_config( - [[1, 2, 3]], [[(ops.PadEnd([4], 0), 0.5)], [(1, 0.4)]]) + assert "op of (op, prob) in policy[1][0] is neither a c_transform op (TensorOp) nor a callable pyfunc" \ + in test_config([[1, 2, 3]], [[(ops.PadEnd([4], 0), 0.5)], [(1, 0.4)]]) assert "prob of (op, prob) policy[1][0] is not within the required interval of (0 to 1)" in test_config([[1]], [ [(ops.Duplicate(), 0)], [(ops.Duplicate(), -0.1)]])