modify return description in comments

pull/11379/head
Xiao Tianci 4 years ago
parent cf87c0304d
commit f7093efe6a

@ -102,7 +102,7 @@ def get_seed():
Get the seed.
Returns:
Int, seed.
int, seed.
"""
return _config.get_seed()
@ -131,7 +131,7 @@ def get_prefetch_size():
Get the prefetch size in number of rows.
Returns:
Size, total number of rows to be prefetched.
int, total number of rows to be prefetched.
"""
return _config.get_op_connector_size()
@ -162,7 +162,7 @@ def get_num_parallel_workers():
This is the DEFAULT num_parallel_workers value used for each op, it is not related to AutoNumWorker feature.
Returns:
Int, number of parallel workers to be used as a default for each operation
int, number of parallel workers to be used as a default for each operation.
"""
return _config.get_num_parallel_workers()
@ -193,7 +193,7 @@ def get_numa_enable():
This is the DEFAULT numa enabled value used for the all process.
Returns:
boolean, the default state of numa enabled
bool, the default state of numa enabled.
"""
return _config.get_numa_enable()
@ -222,7 +222,7 @@ def get_monitor_sampling_interval():
Get the default interval of performance monitor sampling.
Returns:
Int, interval (in milliseconds) for performance monitor sampling.
int, interval (in milliseconds) for performance monitor sampling.
"""
return _config.get_monitor_sampling_interval()
@ -280,7 +280,8 @@ def get_auto_num_workers():
Get the setting (turned on or off) automatic number of workers.
Returns:
Bool, whether auto num worker feature is turned on
bool, whether auto num worker feature is turned on.
Examples:
>>> num_workers = ds.config.get_auto_num_workers()
"""
@ -313,7 +314,7 @@ def get_callback_timeout():
In case of a deadlock, the wait function will exit after the timeout period.
Returns:
Int, the duration in seconds
int, the duration in seconds.
"""
return _config.get_callback_timeout()
@ -323,7 +324,7 @@ def __str__():
String representation of the configurations.
Returns:
Str, configurations.
str, configurations.
"""
return str(_config)

File diff suppressed because it is too large Load Diff

@ -100,7 +100,7 @@ class GraphData:
node_type (int): Specify the type of node.
Returns:
numpy.ndarray: Array of nodes.
numpy.ndarray, array of nodes.
Examples:
>>> import mindspore.dataset as ds
@ -124,7 +124,7 @@ class GraphData:
edge_type (int): Specify the type of edge.
Returns:
numpy.ndarray: array of edges.
numpy.ndarray, array of edges.
Examples:
>>> import mindspore.dataset as ds
@ -148,7 +148,7 @@ class GraphData:
edge_list (Union[list, numpy.ndarray]): The given list of edges.
Returns:
numpy.ndarray: Array of nodes.
numpy.ndarray, array of nodes.
Raises:
TypeError: If `edge_list` is not list or ndarray.
@ -167,7 +167,7 @@ class GraphData:
neighbor_type (int): Specify the type of neighbor.
Returns:
numpy.ndarray: Array of nodes.
numpy.ndarray, array of neighbors.
Examples:
>>> import mindspore.dataset as ds
@ -201,7 +201,7 @@ class GraphData:
neighbor_types (Union[list, numpy.ndarray]): Neighbor type sampled per hop.
Returns:
numpy.ndarray: Array of nodes.
numpy.ndarray, array of neighbors.
Examples:
>>> import mindspore.dataset as ds
@ -231,7 +231,7 @@ class GraphData:
neg_neighbor_type (int): Specify the type of negative neighbor.
Returns:
numpy.ndarray: Array of nodes.
numpy.ndarray, array of neighbors.
Examples:
>>> import mindspore.dataset as ds
@ -260,7 +260,7 @@ class GraphData:
feature_types (Union[list, numpy.ndarray]): The given list of feature types.
Returns:
numpy.ndarray: array of features.
numpy.ndarray, array of features.
Examples:
>>> import mindspore.dataset as ds
@ -292,7 +292,7 @@ class GraphData:
feature_types (Union[list, numpy.ndarray]): The given list of feature types.
Returns:
numpy.ndarray: array of features.
numpy.ndarray, array of features.
Examples:
>>> import mindspore.dataset as ds
@ -320,7 +320,7 @@ class GraphData:
the feature information of nodes, the number of edges, the type of edges, and the feature information of edges.
Returns:
dict: Meta information of the graph. The key is node_type, edge_type, node_num, edge_num,
dict, meta information of the graph. The key is node_type, edge_type, node_num, edge_num,
node_feature_type and edge_feature_type.
"""
if self._working_mode == 'server':
@ -347,7 +347,7 @@ class GraphData:
A default value of -1 indicates that no node is given.
Returns:
numpy.ndarray: Array of nodes.
numpy.ndarray, array of nodes.
Examples:
>>> import mindspore.dataset as ds

@ -128,6 +128,7 @@ class BuiltinSampler:
User should not extend this class.
"""
def __init__(self, num_samples=None):
self.child_sampler = None
self.num_samples = num_samples
@ -201,7 +202,7 @@ class BuiltinSampler:
- None
Returns:
int, The number of samples, or None
int, the number of samples, or None
"""
if self.child_sampler is not None:
child_samples = self.child_sampler.get_num_samples()

@ -1063,8 +1063,9 @@ class LinearTransformation:
the dot product with the transformation matrix, and reshapes it back to its original shape.
Args:
transformation_matrix (numpy.ndarray): a square transformation matrix of shape (D, D), D = C x H x W.
mean_vector (numpy.ndarray): a NumPy ndarray of shape (D,) where D = C x H x W.
transformation_matrix (numpy.ndarray): a square transformation matrix of shape (D, D), where
:math:`D = C \times H \times W`.
mean_vector (numpy.ndarray): a NumPy ndarray of shape (D,) where :math:`D = C \times H \times W`.
Examples:
>>> from mindspore.dataset.transforms.py_transforms import Compose

@ -23,7 +23,7 @@ from PIL import Image
import mindspore as ms
import mindspore.dataset as ds
from mindspore import log
from mindspore.dataset.engine.datasets import Dataset
from mindspore.dataset import Dataset
from mindspore.nn import Cell, SequentialCell
from mindspore.ops.operations import ExpandDims
from mindspore.train._utils import check_value_type

@ -30,6 +30,7 @@ from .common.exceptions import ParamValueError, ParamTypeError, MRMInvalidSchema
__all__ = ['FileWriter']
class FileWriter:
"""
Class to write user defined raw data into MindRecord File series.
@ -45,6 +46,7 @@ class FileWriter:
Raises:
ParamValueError: If `file_name` or `shard_num` is invalid.
"""
def __init__(self, file_name, shard_num=1):
check_filename(file_name)
self._file_name = file_name
@ -84,7 +86,7 @@ class FileWriter:
file_name (str): String of MindRecord file name.
Returns:
Instance of FileWriter.
FileWriter, file writer for the opened MindRecord file.
Raises:
ParamValueError: If file_name is invalid.
@ -118,7 +120,7 @@ class FileWriter:
desc (str, optional): String of schema description (default=None).
Returns:
An integer, schema id.
int, schema id.
Raises:
MRMInvalidSchemaError: If schema is invalid.
@ -175,17 +177,17 @@ class FileWriter:
if field not in v:
error_data_dic[i] = "for schema, {} th data is wrong, " \
"there is not '{}' object in the raw data.".format(i, field)
"there is not '{}' object in the raw data.".format(i, field)
continue
field_type = type(v[field]).__name__
if field_type not in VALUE_TYPE_MAP:
error_data_dic[i] = "for schema, {} th data is wrong, " \
"data type for '{}' is not matched.".format(i, field)
"data type for '{}' is not matched.".format(i, field)
continue
if schema_content[field]["type"] not in VALUE_TYPE_MAP[field_type]:
error_data_dic[i] = "for schema, {} th data is wrong, " \
"data type for '{}' is not matched.".format(i, field)
"data type for '{}' is not matched.".format(i, field)
continue
if field_type == 'ndarray':
@ -206,7 +208,6 @@ class FileWriter:
def open_and_set_header(self):
"""
Open writer and set header.
"""
if not self._writer.is_open:
self._writer.open(self._paths)
@ -222,6 +223,9 @@ class FileWriter:
raw_data (list[dict]): List of raw data.
parallel_writer (bool, optional): Load data parallel if it equals to True (default=False).
Returns:
MSRStatus, SUCCESS or FAILED.
Raises:
ParamTypeError: If index field is invalid.
MRMOpenError: If failed to open MindRecord File.
@ -330,7 +334,7 @@ class FileWriter:
v (dict): Sub dict in schema
Returns:
bool, True or False.
bool, whether the array item is valid.
str, error message.
"""
if v['type'] not in VALID_ARRAY_ATTRIBUTES:
@ -355,7 +359,7 @@ class FileWriter:
content (dict): Dict of raw schema.
Returns:
bool, True or False.
bool, whether the schema is valid.
str, error message.
"""
error = ''

@ -23,6 +23,7 @@ from .common.exceptions import ParamValueError, ParamTypeError, MRMDefineCategor
__all__ = ['MindPage']
class MindPage:
"""
Class to read MindRecord File series in pagination.
@ -36,6 +37,7 @@ class MindPage:
ParamValueError: If `file_name`, `num_consumer` or columns is invalid.
MRMInitSegmentError: If failed to initialize ShardSegment.
"""
def __init__(self, file_name, num_consumer=4):
if isinstance(file_name, list):
for f in file_name:
@ -69,7 +71,12 @@ class MindPage:
return self._candidate_fields
def get_category_fields(self):
"""Return candidate category fields."""
"""
Return candidate category fields.
Returns:
list[str], by which data could be grouped.
"""
logger.warning("WARN_DEPRECATED: The usage of get_category_fields is deprecated."
" Please use candidate_fields")
return self.candidate_fields
@ -97,12 +104,22 @@ class MindPage:
@property
def category_field(self):
"""Getter function for category fields."""
"""
Getter function for category fields.
Returns:
list[str], by which data could be grouped.
"""
return self._category_field
@category_field.setter
def category_field(self, category_field):
"""Setter function for category field"""
"""
Setter function for category field.
Returns:
MSRStatus, SUCCESS or FAILED.
"""
if not category_field or not isinstance(category_field, str):
raise ParamTypeError('category_fields', 'str')
if category_field not in self._candidate_fields:
@ -132,7 +149,7 @@ class MindPage:
num_row (int): Number of rows in a page.
Returns:
List, list[dict].
list[dict], data queried by category id.
Raises:
ParamValueError: If any parameter is invalid.
@ -158,7 +175,7 @@ class MindPage:
num_row (int): Number of row in a page.
Returns:
str, read at page.
list[dict], data queried by category name.
"""
if not isinstance(category_name, str):
raise ParamValueError("Category name should be str.")

@ -23,6 +23,7 @@ from .common.exceptions import MRMOpenError, MRMOpenForAppendError, MRMInvalidHe
__all__ = ['ShardWriter']
class ShardWriter:
"""
Wrapper class which is represent shardWrite class in c++ module.
@ -192,9 +193,11 @@ class ShardWriter:
if len(blob_data) == 1:
values = [v for v in blob_data.values()]
return bytes(values[0])
# convert int to bytes
def int_to_bytes(x: int) -> bytes:
return x.to_bytes(8, 'big')
merged = bytes()
for field, v in blob_data.items():
# convert ndarray to bytes
@ -209,7 +212,7 @@ class ShardWriter:
Flush data to disk.
Returns:
Class MSRStatus, SUCCESS or FAILED.
MSRStatus, SUCCESS or FAILED.
Raises:
MRMCommitError: If failed to flush data to disk.

@ -33,6 +33,7 @@ except ModuleNotFoundError:
__all__ = ['Cifar100ToMR']
class Cifar100ToMR:
"""
A class to transform from cifar100 to MindRecord.
@ -44,6 +45,7 @@ class Cifar100ToMR:
Raises:
ValueError: If source or destination is invalid.
"""
def __init__(self, source, destination):
check_filename(source)
self.source = source
@ -74,7 +76,7 @@ class Cifar100ToMR:
fields (list[str]): A list of index field, e.g.["fine_label", "coarse_label"].
Returns:
SUCCESS or FAILED, whether cifar100 is successfully transformed to MindRecord.
MSRStatus, whether cifar100 is successfully transformed to MindRecord.
"""
if fields and not isinstance(fields, list):
raise ValueError("The parameter fields should be None or list")
@ -114,6 +116,7 @@ class Cifar100ToMR:
raise t.exception
return t.res
def _construct_raw_data(images, fine_labels, coarse_labels):
"""
Construct raw data from cifar100 data.
@ -124,7 +127,7 @@ def _construct_raw_data(images, fine_labels, coarse_labels):
coarse_labels (list): coarse label list from cifar100.
Returns:
SUCCESS/FAILED, whether successfully written into MindRecord.
list[dict], data dictionary constructed from cifar100.
"""
if not cv2:
raise ModuleNotFoundError("opencv-python module not found, please use pip install it.")
@ -141,6 +144,7 @@ def _construct_raw_data(images, fine_labels, coarse_labels):
raw_data.append(row_data)
return raw_data
def _generate_mindrecord(file_name, raw_data, fields, schema_desc):
"""
Generate MindRecord file from raw data.
@ -153,7 +157,7 @@ def _generate_mindrecord(file_name, raw_data, fields, schema_desc):
schema_desc (str): String of schema description.
Returns:
SUCCESS/FAILED, whether successfully written into MindRecord.
MSRStatus, whether successfully written into MindRecord.
"""
schema = {"id": {"type": "int64"}, "fine_label": {"type": "int64"},
"coarse_label": {"type": "int64"}, "data": {"type": "bytes"}}

@ -25,6 +25,7 @@ from .cifar10 import Cifar10
from ..common.exceptions import PathNotExistsError
from ..filewriter import FileWriter
from ..shardutils import check_filename, ExceptionThread, SUCCESS, FAILED
try:
cv2 = import_module("cv2")
except ModuleNotFoundError:
@ -32,6 +33,7 @@ except ModuleNotFoundError:
__all__ = ['Cifar10ToMR']
class Cifar10ToMR:
"""
A class to transform from cifar10 to MindRecord.
@ -43,6 +45,7 @@ class Cifar10ToMR:
Raises:
ValueError: If source or destination is invalid.
"""
def __init__(self, source, destination):
check_filename(source)
self.source = source
@ -73,7 +76,7 @@ class Cifar10ToMR:
fields (list[str], optional): A list of index fields, e.g.["label"] (default=None).
Returns:
SUCCESS or FAILED, whether cifar10 is successfully transformed to MindRecord.
MSRStatus, whether cifar10 is successfully transformed to MindRecord.
"""
if fields and not isinstance(fields, list):
raise ValueError("The parameter fields should be None or list")
@ -109,6 +112,7 @@ class Cifar10ToMR:
raise t.exception
return t.res
def _construct_raw_data(images, labels):
"""
Construct raw data from cifar10 data.
@ -118,7 +122,7 @@ def _construct_raw_data(images, labels):
labels (list): label list from cifar10.
Returns:
SUCCESS/FAILED, whether successfully written into MindRecord.
list[dict], data dictionary constructed from cifar10.
"""
if not cv2:
raise ModuleNotFoundError("opencv-python module not found, please use pip install it.")
@ -133,6 +137,7 @@ def _construct_raw_data(images, labels):
raw_data.append(row_data)
return raw_data
def _generate_mindrecord(file_name, raw_data, fields, schema_desc):
"""
Generate MindRecord file from raw data.
@ -145,7 +150,7 @@ def _generate_mindrecord(file_name, raw_data, fields, schema_desc):
schema_desc (str): String of schema description.
Returns:
SUCCESS/FAILED, whether successfully written into MindRecord.
MSRStatus, whether successfully written into MindRecord.
"""
schema = {"id": {"type": "int64"}, "label": {"type": "int64"},
"data": {"type": "bytes"}}

@ -29,6 +29,7 @@ except ModuleNotFoundError:
__all__ = ['CsvToMR']
class CsvToMR:
"""
A class to transform from csv to MindRecord.
@ -121,7 +122,7 @@ class CsvToMR:
Executes transformation from csv to MindRecord.
Returns:
SUCCESS or FAILED, whether csv is successfully transformed to MindRecord.
MSRStatus, whether csv is successfully transformed to MindRecord.
"""
if not os.path.exists(self.source):
raise IOError("Csv file {} do not exist.".format(self.source))

@ -47,6 +47,7 @@ class ImageNetToMR:
Raises:
ValueError: If `map_file`, `image_dir` or `destination` is invalid.
"""
def __init__(self, map_file, image_dir, destination, partition_number=1):
check_filename(map_file)
self.map_file = map_file
@ -122,7 +123,7 @@ class ImageNetToMR:
Executes transformation from imagenet to MindRecord.
Returns:
SUCCESS or FAILED, whether imagenet is successfully transformed to MindRecord.
MSRStatus, whether imagenet is successfully transformed to MindRecord.
"""
t0_total = time.time()
@ -133,10 +134,10 @@ class ImageNetToMR:
logger.info("transformed MindRecord schema is: {}".format(imagenet_schema_json))
# set the header size
self.writer.set_header_size(1<<24)
self.writer.set_header_size(1 << 24)
# set the page size
self.writer.set_page_size(1<<26)
self.writer.set_page_size(1 << 26)
# create the schema
self.writer.add_schema(imagenet_schema_json, "imagenet_schema")

@ -32,6 +32,7 @@ except ModuleNotFoundError:
__all__ = ['MnistToMR']
class MnistToMR:
"""
A class to transform from Mnist to MindRecord.
@ -125,7 +126,7 @@ class MnistToMR:
Executes transformation from Mnist train part to MindRecord.
Returns:
SUCCESS/FAILED, whether successfully written into MindRecord.
MSRStatus, whether successfully written into MindRecord.
"""
t0_total = time.time()
@ -173,7 +174,7 @@ class MnistToMR:
Executes transformation from Mnist test part to MindRecord.
Returns:
SUCCESS or FAILED, whether Mnist is successfully transformed to MindRecord.
MSRStatus, whether Mnist is successfully transformed to MindRecord.
"""
t0_total = time.time()
@ -222,7 +223,7 @@ class MnistToMR:
Executes transformation from Mnist to MindRecord.
Returns:
SUCCESS/FAILED, whether successfully written into MindRecord.
MSRStatus, whether successfully written into MindRecord.
"""
if not cv2:
raise ModuleNotFoundError("opencv-python module not found, please use pip install it.")

@ -23,7 +23,6 @@ from mindspore import log as logger
from ..filewriter import FileWriter
from ..shardutils import check_filename, ExceptionThread
__all__ = ['TFRecordToMR']
SupportedTensorFlowVersion = '1.13.0-rc1'
@ -86,9 +85,10 @@ class TFRecordToMR:
ValueError: If parameter is invalid.
Exception: when tensorflow module is not found or version is not correct.
"""
def __init__(self, source, destination, feature_dict, bytes_fields=None):
try:
self.tf = import_module("tensorflow") # just used to convert tfrecord to mindrecord
self.tf = import_module("tensorflow") # just used to convert tfrecord to mindrecord
except ModuleNotFoundError:
self.tf = None
if not self.tf:
@ -265,7 +265,7 @@ class TFRecordToMR:
Execute transformation from TFRecord to MindRecord.
Returns:
SUCCESS or FAILED, whether TFRecord is successfuly transformed to MindRecord.
MSRStatus, whether TFRecord is successfuly transformed to MindRecord.
"""
writer = FileWriter(self.destination)
logger.info("Transformed MindRecord schema is: {}, TFRecord feature dict is: {}"

@ -22,13 +22,12 @@ import pickle
import numpy as np
import pandas as pd
from mindspore.dataset.engine import GeneratorDataset
from mindspore.dataset import GeneratorDataset
import src.constants as rconst
import src.movielens as movielens
import src.stat_utils as stat_utils
DATASET_TO_NUM_USERS_AND_ITEMS = {
"ml-1m": (6040, 3706),
"ml-20m": (138493, 26744)
@ -205,6 +204,7 @@ class NCFDataset:
"""
A dataset for NCF network.
"""
def __init__(self,
pos_users,
pos_items,
@ -407,6 +407,7 @@ class RandomSampler:
"""
A random sampler for dataset.
"""
def __init__(self, pos_count, num_train_negatives, batch_size):
self.pos_count = pos_count
self._num_samples = (1 + num_train_negatives) * self.pos_count
@ -433,6 +434,7 @@ class DistributedSamplerOfTrain:
"""
A distributed sampler for dataset.
"""
def __init__(self, pos_count, num_train_negatives, batch_size, rank_id, rank_size):
"""
Distributed sampler of training dataset.
@ -443,15 +445,16 @@ class DistributedSamplerOfTrain:
self._batch_size = batch_size
self._batchs_per_rank = int(math.ceil(self._num_samples / self._batch_size / rank_size))
self._samples_per_rank = int(math.ceil(self._batchs_per_rank * self._batch_size))
self._samples_per_rank = int(math.ceil(self._batchs_per_rank * self._batch_size))
self._total_num_samples = self._samples_per_rank * self._rank_size
def __iter__(self):
"""
Returns the data after each sampling.
"""
indices = stat_utils.permutation((self._num_samples, stat_utils.random_int32()))
indices = indices.tolist()
indices.extend(indices[:self._total_num_samples-len(indices)])
indices.extend(indices[:self._total_num_samples - len(indices)])
indices = indices[self._rank_id:self._total_num_samples:self._rank_size]
batch_indices = [indices[x * self._batch_size:(x + 1) * self._batch_size] for x in range(self._batchs_per_rank)]
@ -463,10 +466,12 @@ class DistributedSamplerOfTrain:
"""
return self._batchs_per_rank
class SequenceSampler:
"""
A sequence sampler for dataset.
"""
def __init__(self, eval_batch_size, num_users):
self._eval_users_per_batch = int(
eval_batch_size // (1 + rconst.NUM_EVAL_NEGATIVES))
@ -491,10 +496,12 @@ class SequenceSampler:
"""
return self._eval_batches_per_epoch
class DistributedSamplerOfEval:
"""
A distributed sampler for eval dataset.
"""
def __init__(self, eval_batch_size, num_users, rank_id, rank_size):
self._eval_users_per_batch = int(
eval_batch_size // (1 + rconst.NUM_EVAL_NEGATIVES))
@ -507,8 +514,8 @@ class DistributedSamplerOfEval:
self._eval_batch_size = eval_batch_size
self._batchs_per_rank = int(math.ceil(self._eval_batches_per_epoch / rank_size))
#self._samples_per_rank = int(math.ceil(self._batchs_per_rank * self._eval_batch_size))
#self._total_num_samples = self._samples_per_rank * self._rank_size
# self._samples_per_rank = int(math.ceil(self._batchs_per_rank * self._eval_batch_size))
# self._total_num_samples = self._samples_per_rank * self._rank_size
def __iter__(self):
indices = [(x * self._eval_users_per_batch, (x + self._rank_id + 1) * self._eval_users_per_batch)
@ -525,6 +532,7 @@ class DistributedSamplerOfEval:
def __len__(self):
return self._batchs_per_rank
def parse_eval_batch_size(eval_batch_size):
"""
Parse eval batch size.

Loading…
Cancel
Save