switch input columns and operation

change ImagefolderDV2 name

change ds.transforms.vision to ds.vision

change batch api to match map api more closely

compose op changes

test_pylint

remove compose op from vision, move to transform module, refactor map and batch to use column_order
pull/5384/head
nhussain 5 years ago
parent 75045e3e2a
commit 3bac9d3713

@ -733,7 +733,7 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
(void)map_builder.SetInColNames(in_col_names);
} else if (key == "output_columns") {
(void)map_builder.SetOutColNames(ToStringVector(value));
} else if (key == "columns_order") {
} else if (key == "column_order") {
project_columns = ToStringVector(value);
} else if (key == "num_parallel_workers") {
num_workers = ToInt(value);

@ -113,7 +113,7 @@ Status ImageFolderOp::PrescanMasterEntry(const std::string &filedir) {
num_rows_ = image_label_pairs_.size();
if (num_rows_ == 0) {
RETURN_STATUS_UNEXPECTED(
"There is no valid data matching the dataset API ImageFolderDatasetV2.Please check file path or dataset "
"There is no valid data matching the dataset API ImageFolderDataset. Please check file path or dataset "
"API validation first.");
}
// free memory of two queues used for pre-scan

@ -111,7 +111,7 @@ constexpr char kWhitespaceTokenizerOp[] = "WhitespaceTokenizerOp";
constexpr char kWordpieceTokenizerOp[] = "WordpieceTokenizerOp";
constexpr char kRandomChoiceOp[] = "RandomChoiceOp";
constexpr char kRandomApplyOp[] = "RandomApplyOp";
constexpr char kComposeOp[] = "ComposeOp";
constexpr char kComposeOp[] = "Compose";
constexpr char kRandomSelectSubpolicyOp[] = "RandomSelectSubpolicyOp";
constexpr char kSentencepieceTokenizerOp[] = "SentencepieceTokenizerOp";

@ -19,7 +19,7 @@ can also create samplers with this module to sample data.
"""
from .core import config
from .engine.datasets import TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, NumpySlicesDataset, \
from .engine.datasets import TFRecordDataset, ImageFolderDataset, MnistDataset, MindDataset, NumpySlicesDataset, \
GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CocoDataset, CelebADataset, \
TextFileDataset, CLUEDataset, CSVDataset, Schema, Shuffle, zip, RandomDataset, PaddedDataset
from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \
@ -28,7 +28,7 @@ from .engine.cache_client import DatasetCache
from .engine.serializer_deserializer import serialize, deserialize, show
from .engine.graphdata import GraphData
__all__ = ["config", "ImageFolderDatasetV2", "MnistDataset", "PaddedDataset",
__all__ = ["config", "ImageFolderDataset", "MnistDataset", "PaddedDataset",
"MindDataset", "GeneratorDataset", "TFRecordDataset",
"ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset", "NumpySlicesDataset", "VOCDataset",
"CocoDataset", "TextFileDataset", "CLUEDataset", "CSVDataset", "Schema", "DistributedSampler", "PKSampler",

@ -0,0 +1,31 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
General py_transforms_utils functions.
"""
import numpy as np
def is_numpy(img):
"""
Check if the input image is Numpy format.
Args:
img: Image to be checked.
Returns:
Bool, True if input is Numpy image.
"""
return isinstance(img, np.ndarray)

@ -28,7 +28,7 @@ from .serializer_deserializer import serialize, deserialize, show, compare
from .samplers import *
from ..core import config
__all__ = ["config", "zip", "ImageFolderDatasetV2", "MnistDataset",
__all__ = ["config", "zip", "ImageFolderDataset", "MnistDataset",
"MindDataset", "GeneratorDataset", "TFRecordDataset", "CLUEDataset", "CSVDataset",
"ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset",
"VOCDataset", "CocoDataset", "TextFileDataset", "Schema", "DistributedSampler",

File diff suppressed because it is too large Load Diff

@ -150,7 +150,7 @@ class Iterator:
op_type = OpName.SKIP
elif isinstance(dataset, de.TakeDataset):
op_type = OpName.TAKE
elif isinstance(dataset, de.ImageFolderDatasetV2):
elif isinstance(dataset, de.ImageFolderDataset):
op_type = OpName.IMAGEFOLDER
elif isinstance(dataset, de.GeneratorDataset):
op_type = OpName.GENERATOR

@ -41,7 +41,7 @@ class Sampler:
>>> for i in range(self.dataset_size - 1, -1, -1):
>>> yield i
>>>
>>> ds = ds.ImageFolderDatasetV2(path, sampler=ReverseSampler())
>>> ds = ds.ImageFolderDataset(path, sampler=ReverseSampler())
"""
def __init__(self, num_samples=None):
@ -232,7 +232,7 @@ class DistributedSampler(BuiltinSampler):
>>>
>>> # creates a distributed sampler with 10 shards total. This shard is shard 5
>>> sampler = ds.DistributedSampler(10, 5)
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler)
>>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
Raises:
ValueError: If num_shards is not positive.
@ -315,7 +315,7 @@ class PKSampler(BuiltinSampler):
>>>
>>> # creates a PKSampler that will get 3 samples from every class.
>>> sampler = ds.PKSampler(3)
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler)
>>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
Raises:
ValueError: If num_val is not positive.
@ -387,7 +387,7 @@ class RandomSampler(BuiltinSampler):
>>>
>>> # creates a RandomSampler
>>> sampler = ds.RandomSampler()
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler)
>>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
Raises:
ValueError: If replacement is not boolean.
@ -447,7 +447,7 @@ class SequentialSampler(BuiltinSampler):
>>>
>>> # creates a SequentialSampler
>>> sampler = ds.SequentialSampler()
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler)
>>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
"""
def __init__(self, start_index=None, num_samples=None):
@ -510,7 +510,7 @@ class SubsetRandomSampler(BuiltinSampler):
>>>
>>> # creates a SubsetRandomSampler, will sample from the provided indices
>>> sampler = ds.SubsetRandomSampler()
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler)
>>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
"""
def __init__(self, indices, num_samples=None):
@ -573,7 +573,7 @@ class WeightedRandomSampler(BuiltinSampler):
>>>
>>> # creates a WeightedRandomSampler that will sample 4 elements without replacement
>>> sampler = ds.WeightedRandomSampler(weights, 4)
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler)
>>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
Raises:
ValueError: If num_samples is not positive.

@ -21,9 +21,10 @@ import sys
from mindspore import log as logger
from . import datasets as de
from ..transforms.vision.utils import Inter, Border
from ..vision.utils import Inter, Border
from ..core import config
def serialize(dataset, json_filepath=None):
"""
Serialize dataset pipeline into a json file.
@ -44,7 +45,7 @@ def serialize(dataset, json_filepath=None):
>>> DATA_DIR = "../../data/testMnistData"
>>> data = ds.MnistDataset(DATA_DIR, 100)
>>> one_hot_encode = C.OneHot(10) # num_classes is input argument
>>> data = data.map(input_column_names="label", operation=one_hot_encode)
>>> data = data.map(operation=one_hot_encode, input_column_names="label")
>>> data = data.batch(batch_size=10, drop_remainder=True)
>>>
>>> ds.engine.serialize(data, json_filepath="mnist_dataset_pipeline.json") # serialize it to json file
@ -77,7 +78,7 @@ def deserialize(input_dict=None, json_filepath=None):
>>> DATA_DIR = "../../data/testMnistData"
>>> data = ds.MnistDataset(DATA_DIR, 100)
>>> one_hot_encode = C.OneHot(10) # num_classes is input argument
>>> data = data.map(input_column_names="label", operation=one_hot_encode)
>>> data = data.map(operation=one_hot_encode, input_column_names="label")
>>> data = data.batch(batch_size=10, drop_remainder=True)
>>>
>>> # Use case 1: to/from json file
@ -254,7 +255,7 @@ def create_node(node):
pyobj = None
# Find a matching Dataset class and call the constructor with the corresponding args.
# When a new Dataset class is introduced, another if clause and parsing code needs to be added.
if dataset_op == 'ImageFolderDatasetV2':
if dataset_op == 'ImageFolderDataset':
sampler = construct_sampler(node.get('sampler'))
pyobj = pyclass(node['dataset_dir'], node.get('num_samples'), node.get('num_parallel_workers'),
node.get('shuffle'), sampler, node.get('extensions'),
@ -336,8 +337,8 @@ def create_node(node):
elif dataset_op == 'MapDataset':
tensor_ops = construct_tensor_ops(node.get('operations'))
pyobj = de.Dataset().map(node.get('input_columns'), tensor_ops, node.get('output_columns'),
node.get('columns_order'), node.get('num_parallel_workers'))
pyobj = de.Dataset().map(tensor_ops, node.get('input_columns'), node.get('output_columns'),
node.get('column_order'), node.get('num_parallel_workers'))
elif dataset_op == 'ShuffleDataset':
pyobj = de.Dataset().shuffle(node.get('buffer_size'))

@ -35,8 +35,8 @@ from . import cache_client
from .. import callback
def check_imagefolderdatasetv2(method):
"""A wrapper that wraps a parameter checker around the original Dataset(ImageFolderDatasetV2)."""
def check_imagefolderdataset(method):
"""A wrapper that wraps a parameter checker around the original Dataset(ImageFolderDataset)."""
@wraps(method)
def new_method(self, *args, **kwargs):
@ -474,8 +474,8 @@ def check_batch(method):
@wraps(method)
def new_method(self, *args, **kwargs):
[batch_size, drop_remainder, num_parallel_workers, per_batch_map,
input_columns, pad_info], param_dict = parse_user_args(method, *args, **kwargs)
[batch_size, drop_remainder, num_parallel_workers, per_batch_map, input_columns, output_columns,
column_order, pad_info], param_dict = parse_user_args(method, *args, **kwargs)
if not (isinstance(batch_size, int) or (callable(batch_size))):
raise TypeError("batch_size should either be an int or a callable.")
@ -510,6 +510,12 @@ def check_batch(method):
if len(input_columns) != (len(ins.signature(per_batch_map).parameters) - 1):
raise ValueError("the signature of per_batch_map should match with input columns")
if output_columns is not None:
raise ValueError("output_columns is currently not implemented.")
if column_order is not None:
raise ValueError("column_order is currently not implemented.")
return method(self, *args, **kwargs)
return new_method
@ -551,14 +557,14 @@ def check_map(method):
@wraps(method)
def new_method(self, *args, **kwargs):
[input_columns, _, output_columns, columns_order, num_parallel_workers, python_multiprocessing, cache,
[_, input_columns, output_columns, column_order, num_parallel_workers, python_multiprocessing, cache,
callbacks], _ = \
parse_user_args(method, *args, **kwargs)
nreq_param_columns = ['input_columns', 'output_columns', 'columns_order']
nreq_param_columns = ['input_columns', 'output_columns', 'column_order']
if columns_order is not None:
type_check(columns_order, (list,), "columns_order")
if column_order is not None:
type_check(column_order, (list,), "column_order")
if num_parallel_workers is not None:
check_num_parallel_workers(num_parallel_workers)
type_check(python_multiprocessing, (bool,), "python_multiprocessing")
@ -571,7 +577,7 @@ def check_map(method):
else:
type_check(callbacks, (callback.DSCallback,), "callbacks")
for param_name, param in zip(nreq_param_columns, [input_columns, output_columns, columns_order]):
for param_name, param in zip(nreq_param_columns, [input_columns, output_columns, column_order]):
if param is not None:
check_columns(param, param_name)
if callbacks is not None:

@ -103,7 +103,6 @@ class SlidingWindow(cde.SlidingWindowOp):
super().__init__(width, axis)
class Ngram(cde.NgramOp):
"""
TensorOp to generate n-gram from a 1-D string Tensor.
@ -161,8 +160,9 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
>>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_jieba_init
@ -281,7 +281,7 @@ class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp):
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeCharTokenizer(True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
"""
@check_with_offsets
@ -312,7 +312,7 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp):
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
>>> max_bytes_per_token=100, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
"""
@check_wordpiece_tokenizer
@ -377,7 +377,7 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.WhitespaceTokenizer(True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
"""
@check_with_offsets
@ -403,7 +403,7 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
"""
@check_unicode_script_tokenizer
@ -496,7 +496,7 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
"""
@check_regex_tokenizer
@ -539,7 +539,7 @@ if platform.system().lower() != 'windows':
>>> preserve_unused_token=True,
>>> with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
"""
@check_basic_tokenizer
@ -592,7 +592,7 @@ if platform.system().lower() != 'windows':
>>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
>>> with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
"""
@check_bert_tokenizer

@ -16,6 +16,6 @@ This module is to support common augmentations. C_transforms is a high performan
image augmentation module which is developed with C++ OpenCV. Py_transforms
provide more kinds of image augmentations which is developed with Python PIL.
"""
from . import vision
from .. import vision
from . import c_transforms
from . import py_transforms

@ -229,8 +229,8 @@ class Duplicate(cde.DuplicateOp):
>>> # +---------+
>>> # | [1,2,3] |
>>> # +---------+
>>> data = data.map(input_columns=["x"], operations=Duplicate(),
>>> output_columns=["x", "y"], columns_order=["x", "y"])
>>> data = data.map(operations=Duplicate(), input_columns=["x"],
>>> output_columns=["x", "y"], column_order=["x", "y"])
>>> # Data after
>>> # | x | y |
>>> # +---------+---------+

@ -17,9 +17,8 @@
This module py_transforms is implemented basing on Python. It provides common
operations including OneHotOp.
"""
from .validators import check_one_hot_op
from .vision import py_transforms_util as util
from .validators import check_one_hot_op, check_compose_list
from . import py_transforms_util as util
class OneHotOp:
@ -48,3 +47,48 @@ class OneHotOp:
label (numpy.ndarray), label after being Smoothed.
"""
return util.one_hot_encoding(label, self.num_classes, self.smoothing_rate)
class Compose:
"""
Compose a list of transforms.
.. Note::
Compose takes a list of transformations either provided in py_transforms or from user-defined implementation;
each can be an initialized transformation class or a lambda function, as long as the output from the last
transformation is a single tensor of type numpy.ndarray. See below for an example of how to use Compose
with py_transforms classes and check out FiveCrop or TenCrop for the use of them in conjunction with lambda
functions.
Args:
transforms (list): List of transformations to be applied.
Examples:
>>> import mindspore.dataset as ds
>>> import mindspore.dataset.vision.py_transforms as py_transforms
>>> from mindspore.dataset.transforms.py_transforms import Compose
>>> dataset_dir = "path/to/imagefolder_directory"
>>> # create a dataset that reads all files in dataset_dir with 8 threads
>>> dataset = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8)
>>> # create a list of transformations to be applied to the image data
>>> transform = Compose([py_transforms.Decode(),
>>> py_transforms.RandomHorizontalFlip(0.5),
>>> py_transforms.ToTensor(),
>>> py_transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262)),
>>> py_transforms.RandomErasing()])
>>> # apply the transform to the dataset through dataset.map()
>>> dataset = dataset.map(operations=transform, input_columns="image")
"""
@check_compose_list
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img):
"""
Call method.
Returns:
lambda function, Lambda function that takes in an img to apply transformations on.
"""
return util.compose(img, self.transforms)

@ -0,0 +1,65 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Built-in py_transforms_utils functions.
"""
import numpy as np
from ..core.py_util_helpers import is_numpy
def compose(img, transforms):
"""
Compose a list of transforms and apply on the image.
Args:
img (numpy.ndarray): An image in Numpy ndarray.
transforms (list): A list of transform Class objects to be composed.
Returns:
img (numpy.ndarray), An augmented image in Numpy ndarray.
"""
if is_numpy(img):
for transform in transforms:
img = transform(img)
if is_numpy(img):
return img
raise TypeError('img should be Numpy ndarray. Got {}. Append ToTensor() to transforms'.format(type(img)))
raise TypeError('img should be Numpy ndarray. Got {}.'.format(type(img)))
def one_hot_encoding(label, num_classes, epsilon):
"""
Apply label smoothing transformation to the input label, and make label be more smoothing and continuous.
Args:
label (numpy.ndarray): label to be applied label smoothing.
num_classes (int): Num class of object in dataset, value should over 0.
epsilon (float): The adjustable Hyper parameter. Default is 0.0.
Returns:
img (numpy.ndarray), label after being one hot encoded and done label smoothed.
"""
if label > num_classes:
raise ValueError('the num_classes is smaller than the category number.')
num_elements = label.size
one_hot_label = np.zeros((num_elements, num_classes), dtype=int)
if isinstance(label, list) is False:
label = [label]
for index in range(num_elements):
one_hot_label[index, label[index]] = 1
return (1 - epsilon) * one_hot_label + epsilon / num_classes

@ -200,3 +200,19 @@ def check_random_transform_ops(method):
return method(self, *args, **kwargs)
return new_method
def check_compose_list(method):
"""Wrapper method to check the transform list of Compose."""
@wraps(method)
def new_method(self, *args, **kwargs):
[transforms], _ = parse_user_args(method, *args, **kwargs)
type_check(transforms, (list,), transforms)
if not transforms:
raise ValueError("transforms list is empty.")
return method(self, *args, **kwargs)
return new_method

@ -25,11 +25,12 @@ to improve their training models.
Examples:
>>> import mindspore.dataset as ds
>>> import mindspore.dataset.transforms.c_transforms as c_transforms
>>> import mindspore.dataset.transforms.vision.c_transforms as c_vision
>>> import mindspore.dataset.vision.c_transforms as c_vision
>>> from mindspore.dataset.transforms.vision.utils import Border, ImageBatchFormat, Inter
>>> dataset_dir = "path/to/imagefolder_directory"
>>> # create a dataset that reads all files in dataset_dir with 8 threads
>>> data1 = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8)
>>> data1 = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8)
>>> # create a list of transformations to be applied to the image data
>>> transforms_list = [c_vision.Decode(),
>>> c_vision.Resize((256, 256)),
@ -1095,7 +1096,7 @@ class UniformAugment(cde.UniformAugOp):
num_ops (int, optional): Number of operations to be selected and applied (default=2).
Examples:
>>> import mindspore.dataset.transforms.vision.py_transforms as py_vision
>>> import mindspore.dataset.vision.py_transforms as py_vision
>>> transforms_list = [c_vision.RandomHorizontalFlip(),
>>> c_vision.RandomVerticalFlip(),
>>> c_vision.RandomColorAdjust(),

@ -24,6 +24,7 @@ import numpy as np
from PIL import Image, ImageOps, ImageEnhance, __version__
from .utils import Inter
from ..core.py_util_helpers import is_numpy
augment_error_message = 'img should be PIL image. Got {}. Use Decode() for encoded data or ToPIL() for decoded data.'
@ -41,39 +42,6 @@ def is_pil(img):
return isinstance(img, Image.Image)
def is_numpy(img):
"""
Check if the input image is NumPy format.
Args:
img: Image to be checked.
Returns:
Bool, True if input is NumPy image.
"""
return isinstance(img, np.ndarray)
def compose(img, transforms):
"""
Compose a list of transforms and apply on the image.
Args:
img (numpy.ndarray): An image in NumPy ndarray.
transforms (list): A list of transform Class objects to be composed.
Returns:
img (numpy.ndarray), An augmented image in NumPy ndarray.
"""
if is_numpy(img):
for transform in transforms:
img = transform(img)
if is_numpy(img):
return img
raise TypeError('img should be NumPy ndarray. Got {}. Append ToTensor() to transforms'.format(type(img)))
raise TypeError('img should be NumPy ndarray. Got {}.'.format(type(img)))
def normalize(img, mean, std):
"""
Normalize the image between [0, 1] with respect to mean and standard deviation.
@ -1221,32 +1189,6 @@ def random_affine(img, angle, translations, scale, shear, resample, fill_value=0
return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs)
def one_hot_encoding(label, num_classes, epsilon):
"""
Apply label smoothing transformation to the input label, and make label be more smoothing and continuous.
Args:
label (numpy.ndarray): label to be applied label smoothing.
num_classes (int): Num class of object in dataset, value should over 0.
epsilon (float): The adjustable Hyper parameter. Default is 0.0.
Returns:
img (numpy.ndarray), label after being one hot encoded and done label smoothed.
"""
if label > num_classes:
raise ValueError('the num_classes is smaller than the category number.')
num_elements = label.size
one_hot_label = np.zeros((num_elements, num_classes), dtype=int)
if isinstance(label, list) is False:
label = [label]
for index in range(num_elements):
one_hot_label[index, label[index]] = 1
return (1 - epsilon) * one_hot_label + epsilon / num_classes
def mix_up_single(batch_size, img, label, alpha=0.2):
"""
Apply mix up transformation to image and label in single batch internal, One hot encoding should done before this.

@ -19,10 +19,10 @@ from functools import wraps
import numpy as np
from mindspore._c_dataengine import TensorOp
from .utils import Inter, Border, ImageBatchFormat
from ...core.validator_helpers import check_value, check_uint8, FLOAT_MAX_INTEGER, check_pos_float32, \
from mindspore.dataset.core.validator_helpers import check_value, check_uint8, FLOAT_MAX_INTEGER, check_pos_float32, \
check_2tuple, check_range, check_positive, INT32_MAX, parse_user_args, type_check, type_check_list, \
check_tensor_op, UINT8_MAX, check_value_normalize_std
from .utils import Inter, Border, ImageBatchFormat
def check_crop_size(size):
@ -678,21 +678,6 @@ def check_positive_degrees(method):
return new_method
def check_compose_list(method):
"""Wrapper method to check the transform list of ComposeOp."""
@wraps(method)
def new_method(self, *args, **kwargs):
[transforms], _ = parse_user_args(method, *args, **kwargs)
type_check(transforms, (list,), transforms)
if not transforms:
raise ValueError("transforms list is empty.")
return method(self, *args, **kwargs)
return new_method
def check_random_select_subpolicy_op(method):
"""Wrapper method to check the parameters of RandomSelectSubpolicyOp."""

@ -727,7 +727,7 @@ class SummaryCollector(Callback):
Get dataset path of MindDataset object.
Args:
output_dataset (Union[Dataset, ImageFolderDatasetV2, MnistDataset, Cifar10Dataset, Cifar100Dataset,
output_dataset (Union[Dataset, ImageFolderDataset, MnistDataset, Cifar10Dataset, Cifar100Dataset,
VOCDataset, CelebADataset, MindDataset, ManifestDataset, TFRecordDataset, TextFileDataset]):
Refer to mindspore.dataset.Dataset.
@ -738,7 +738,7 @@ class SummaryCollector(Callback):
IndexError: it means get dataset path failed.
"""
dataset_package = import_module('mindspore.dataset')
dataset_dir_set = (dataset_package.ImageFolderDatasetV2, dataset_package.MnistDataset,
dataset_dir_set = (dataset_package.ImageFolderDataset, dataset_package.MnistDataset,
dataset_package.Cifar10Dataset, dataset_package.Cifar100Dataset,
dataset_package.VOCDataset, dataset_package.CelebADataset)
dataset_file_set = (dataset_package.MindDataset, dataset_package.ManifestDataset)

@ -449,7 +449,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
if is_training:
ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
columns_order=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
flip = (np.random.rand() < config.flip_ratio)
@ -467,7 +467,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
else:
ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
columns_order=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func,
num_parallel_workers=num_parallel_workers)

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save