From 6fd58dc580000118bd244974205c91be08c087fc Mon Sep 17 00:00:00 2001 From: alex-yuyue Date: Fri, 29 Jan 2021 09:21:06 -0500 Subject: [PATCH] Add call for decoupled image and text ops Signed-off-by: alex-yuyue --- .../ccsrc/minddata/dataset/api/execute.cc | 25 +- .../dataset/include/execute_binding.cc | 22 +- .../ccsrc/minddata/dataset/include/execute.h | 13 +- mindspore/dataset/text/transforms.py | 764 +++++----- mindspore/dataset/transforms/c_transforms.py | 12 +- mindspore/dataset/transforms/validators.py | 20 +- mindspore/dataset/vision/c_transforms.py | 1327 ++++++++--------- tests/ut/python/dataset/test_HWC2CHW.py | 17 +- tests/ut/python/dataset/test_compose.py | 10 +- tests/ut/python/dataset/test_invert.py | 18 +- .../dataset/test_random_crop_and_resize.py | 19 +- .../dataset/test_text_jieba_tokenizer.py | 21 +- .../ut/python/dataset/test_uniform_augment.py | 21 +- tests/ut/python/dataset/test_vocab.py | 14 +- 14 files changed, 1207 insertions(+), 1096 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/api/execute.cc b/mindspore/ccsrc/minddata/dataset/api/execute.cc index c1bbb9b939..bfa0566956 100644 --- a/mindspore/ccsrc/minddata/dataset/api/execute.cc +++ b/mindspore/ccsrc/minddata/dataset/api/execute.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,10 +14,11 @@ * limitations under the License. */ -#include "minddata/dataset/include/execute.h" +#include "minddata/dataset/core/tensor_row.h" #ifdef ENABLE_ANDROID #include "minddata/dataset/include/de_tensor.h" #endif +#include "minddata/dataset/include/execute.h" #include "minddata/dataset/include/tensor.h" #include "minddata/dataset/kernels/tensor_op.h" #ifndef ENABLE_ANDROID @@ -84,5 +85,25 @@ std::shared_ptr Execute::operator()(std::shared_ptr> &input_tensor_list, + std::vector> *output_tensor_list) { + CHECK_FAIL_RETURN_UNEXPECTED(op_ != nullptr, "Input TensorOperation is not valid"); + CHECK_FAIL_RETURN_UNEXPECTED(!input_tensor_list.empty(), "Input Tensor is not valid"); + + TensorRow input, output; + std::copy(input_tensor_list.begin(), input_tensor_list.end(), std::back_inserter(input)); + CHECK_FAIL_RETURN_UNEXPECTED(!input.empty(), "Input Tensor is not valid"); + + std::shared_ptr transform = op_->Build(); + Status rc = transform->Compute(input, &output); + if (rc.IsError()) { + // execution failed + RETURN_STATUS_UNEXPECTED("Operation execution failed : " + rc.ToString()); + } + + std::copy(output.begin(), output.end(), std::back_inserter(*output_tensor_list)); + return Status::OK(); +} + } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/execute_binding.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/execute_binding.cc index cc0510951f..56dc7d793e 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/execute_binding.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/execute_binding.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,14 +28,26 @@ PYBIND_REGISTER(Execute, 0, ([](const py::module *m) { auto execute = std::make_shared(toTensorOperation(operation)); return execute; })) - .def("__call__", [](Execute &self, std::shared_ptr in) { - std::shared_ptr out = self(in); - if (out == nullptr) { + .def("__call__", + [](Execute &self, std::shared_ptr in) { + std::shared_ptr out = self(in); + if (out == nullptr) { + THROW_IF_ERROR([]() { + RETURN_STATUS_UNEXPECTED( + "Failed to execute op in eager mode, please check ERROR log above."); + }()); + } + return out; + }) + .def("__call__", [](Execute &self, const std::vector> &input_tensor_list) { + std::vector> output_tensor_list; + THROW_IF_ERROR(self(input_tensor_list, &output_tensor_list)); + if (output_tensor_list.empty()) { THROW_IF_ERROR([]() { RETURN_STATUS_UNEXPECTED("Failed to execute op in eager mode, please check ERROR log above."); }()); } - return out; + return output_tensor_list; }); })); } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/include/execute.h b/mindspore/ccsrc/minddata/dataset/include/execute.h index 87772907e8..d4320e43e3 100644 --- a/mindspore/ccsrc/minddata/dataset/include/execute.h +++ b/mindspore/ccsrc/minddata/dataset/include/execute.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,16 +43,23 @@ class Execute { #ifdef ENABLE_ANDROID /// \brief callable function to execute the TensorOperation in eager mode - /// \param[inout] input - the tensor to be transformed + /// \param[in] input - the tensor to be transformed /// \return - the output tensor, nullptr if Compute fails std::shared_ptr operator()(std::shared_ptr input); #endif /// \brief callable function to execute the TensorOperation in eager mode - /// \param[inout] input - the tensor to be transformed + /// \param[in] input - the tensor to be transformed /// \return - the output tensor, nullptr if Compute fails std::shared_ptr operator()(std::shared_ptr input); + /// \brief callable function to execute the TensorOperation in eager mode + /// \param[in] input_tensor_list - the tensor to be transformed + /// \param[out] out - the result tensor after transform + /// \return - Status + Status operator()(const std::vector> &input_tensor_list, + std::vector> *out); + private: std::shared_ptr op_; }; diff --git a/mindspore/dataset/text/transforms.py b/mindspore/dataset/text/transforms.py index 358c9a7a29..858a8f576a 100644 --- a/mindspore/dataset/text/transforms.py +++ b/mindspore/dataset/text/transforms.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -59,112 +59,37 @@ from .validators import check_lookup, check_jieba_add_dict, \ check_to_number, check_bert_tokenizer, check_python_tokenizer, check_slidingwindow from ..core.datatypes import mstype_to_detype from ..core.validator_helpers import replace_none +from ..transforms.c_transforms import TensorOperation -class TextTensorOperation: - def parse(self): - raise NotImplementedError("TextTensorOperation has to implement parse method.") - -class Lookup(TextTensorOperation): - """ - Lookup operator that looks up a word to an id. - - Args: - vocab (Vocab): A vocabulary object. - unknown_token (str, optional): Word used for lookup if the word being looked up is out-of-vocabulary (OOV). - If unknown_token is OOV, a runtime error will be thrown (default=None). - data_type (mindspore.dtype, optional): mindspore.dtype that lookup maps string to (default=mstype.int32) - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> # Load vocabulary from list - >>> vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您']) - >>> # Use Lookup operator to map tokens to ids - >>> lookup = text.Lookup(vocab) - >>> data1 = data1.map(operations=[lookup]) - """ - - @check_lookup - def __init__(self, vocab, unknown_token=None, data_type=mstype.int32): - self.vocab = vocab - self.unknown_token = replace_none(unknown_token, '') - self.data_type = data_type - - def parse(self): - return cde.LookupOperation(self.vocab, self.unknown_token, str(mstype_to_detype(self.data_type))) - - -class SlidingWindow(TextTensorOperation): - """ - TensorOp to construct a tensor from data (only 1-D for now), where each element in the dimension axis - is a slice of data starting at the corresponding position, with a specified width. - - Args: - width (int): The width of the window. It must be an integer and greater than zero. - axis (int, optional): The axis along which the sliding window is computed (default=0). - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> # Data before - >>> # | col1 | - >>> # +-------------+ - >>> # | [1,2,3,4,5] | - >>> # +-------------+ - >>> data1 = data1.map(operations=text.SlidingWindow(3, 0)) - >>> # Data after - >>> # | col1 | - >>> # +-------------+ - >>> # | [[1,2,3], | - >>> # | [2,3,4], | - >>> # | [3,4,5]] | - >>> # +--------------+ - """ - - @check_slidingwindow - def __init__(self, width, axis=0): - self.width = width - self.axis = axis - - def parse(self): - return cde.SlidingWindowOperation(self.width, self.axis) - -class Ngram(TextTensorOperation): +class TextTensorOperation(TensorOperation): """ - TensorOp to generate n-gram from a 1-D string Tensor. - - Refer to https://en.wikipedia.org/wiki/N-gram#Examples for an overview of what n-gram is and how it works. - - Args: - n (list[int]): n in n-gram, n >= 1. n is a list of positive integers. For example, if n=[4,3], then the result - would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up - for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore","best"] will result in - an empty string produced. - left_pad (tuple, optional): ("pad_token", pad_width). Padding performed on left side of the sequence. pad_width - will be capped at n-1. left_pad=("_",2) would pad left side of the sequence with "__" (default=None). - right_pad (tuple, optional): ("pad_token", pad_width). Padding performed on right side of the sequence. - pad_width will be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--" - (default=None). - separator (str, optional): symbol used to join strings together. For example. if 2-gram is - ["mindspore", "amazing"] with separator="-", the result would be ["mindspore-amazing"] - (default=None, which means whitespace is used). - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> data1 = data1.map(operations=text.Ngram(3, separator=" ")) + Base class of Text Tensor Ops """ - - @check_ngram - def __init__(self, n, left_pad=("", 0), right_pad=("", 0), separator=" "): - self.ngrams = n - self.left_pad = left_pad - self.right_pad = right_pad - self.separator = separator + def __call__(self, input_tensor): + if not isinstance(input_tensor, list): + input_list = [input_tensor] + else: + input_list = input_tensor + tensor_list = [] + for tensor in input_list: + if not isinstance(tensor, str): + raise TypeError("Input should be string or list of strings, got {}.".format(type(tensor))) + tensor_list.append(cde.Tensor(np.asarray(tensor))) + callable_op = cde.Execute(self.parse()) + output_list = callable_op(tensor_list) + for i, element in enumerate(output_list): + arr = element.as_array() + if arr.dtype.char == 'S': + output_list[i] = to_str(arr) + else: + output_list[i] = arr + if not isinstance(input_tensor, list) and len(output_list) == 1: + output_list = output_list[0] + return output_list def parse(self): - return cde.NgramOperation(self.ngrams, self.left_pad, self.right_pad, self.separator) + raise NotImplementedError("TextTensorOperation has to implement parse() method.") DE_C_INTER_JIEBA_MODE = { @@ -174,6 +99,18 @@ DE_C_INTER_JIEBA_MODE = { } +DE_C_INTER_SENTENCEPIECE_LOADTYPE = { + SPieceTokenizerLoadType.FILE: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KFILE, + SPieceTokenizerLoadType.MODEL: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KMODEL +} + + +DE_C_INTER_SENTENCEPIECE_OUTTYPE = { + SPieceTokenizerOutType.STRING: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KString, + SPieceTokenizerOutType.INT: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KINT +} + + class JiebaTokenizer(TextTensorOperation): """ Tokenize Chinese string into words based on dictionary. @@ -335,85 +272,71 @@ class JiebaTokenizer(TextTensorOperation): " jieba mode file {} is not exist.".format(model_path)) -class UnicodeCharTokenizer(TextTensorOperation): +class Lookup(TextTensorOperation): """ - Tokenize a scalar tensor of UTF-8 string to Unicode characters. + Lookup operator that looks up a word to an id. Args: - with_offsets (bool, optional): If or not output offsets of tokens (default=False). + vocab (Vocab): A vocabulary object. + unknown_token (str, optional): Word used for lookup if the word being looked up is out-of-vocabulary (OOV). + If unknown_token is OOV, a runtime error will be thrown (default=None). + data_type (mindspore.dtype, optional): mindspore.dtype that lookup maps string to (default=mstype.int32) Examples: >>> import mindspore.dataset.text as text >>> - >>> # If with_offsets=False, default output one column {["text", dtype=str]} - >>> tokenizer_op = text.UnicodeCharTokenizer() - >>> data1 = data1.map(operations=tokenizer_op) - >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32], - >>> # ["offsets_limit", dtype=uint32]} - >>> tokenizer_op = text.UnicodeCharTokenizer(True) - >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], - >>> output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"]) + >>> # Load vocabulary from list + >>> vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您']) + >>> # Use Lookup operator to map tokens to ids + >>> lookup = text.Lookup(vocab) + >>> data1 = data1.map(operations=[lookup]) """ - @check_with_offsets - def __init__(self, with_offsets=False): - self.with_offsets = with_offsets + @check_lookup + def __init__(self, vocab, unknown_token=None, data_type=mstype.int32): + self.vocab = vocab + self.unknown_token = replace_none(unknown_token, '') + self.data_type = data_type def parse(self): - return cde.UnicodeCharTokenizerOperation(self.with_offsets) + return cde.LookupOperation(self.vocab, self.unknown_token, str(mstype_to_detype(self.data_type))) -# TODO(alexyuyue): Need to decouple WordpieceTokenizerOp to WordpieceTokenizerOperation after it's supported in C++ -class WordpieceTokenizer(cde.WordpieceTokenizerOp): +class Ngram(TextTensorOperation): """ - Tokenize scalar token or 1-D tokens to 1-D subword tokens. + TensorOp to generate n-gram from a 1-D string Tensor. + + Refer to https://en.wikipedia.org/wiki/N-gram#Examples for an overview of what n-gram is and how it works. Args: - vocab (Vocab): A vocabulary object. - suffix_indicator (str, optional): Used to show that the subword is the last part of a word (default='##'). - max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split (default=100). - unknown_token (str, optional): When a token cannot be found: if 'unknown_token' is empty string, - return the token directly, else return 'unknown_token' (default='[UNK]'). - with_offsets (bool, optional): If or not output offsets of tokens (default=False). + n (list[int]): n in n-gram, n >= 1. n is a list of positive integers. For example, if n=[4,3], then the result + would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up + for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore","best"] will result in + an empty string produced. + left_pad (tuple, optional): ("pad_token", pad_width). Padding performed on left side of the sequence. pad_width + will be capped at n-1. left_pad=("_",2) would pad left side of the sequence with "__" (default=None). + right_pad (tuple, optional): ("pad_token", pad_width). Padding performed on right side of the sequence. + pad_width will be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--" + (default=None). + separator (str, optional): symbol used to join strings together. For example. if 2-gram is + ["mindspore", "amazing"] with separator="-", the result would be ["mindspore-amazing"] + (default=None, which means whitespace is used). Examples: >>> import mindspore.dataset.text as text >>> - >>> # If with_offsets=False, default output one column {["text", dtype=str]} - >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]', - >>> max_bytes_per_token=100, with_offsets=False) - >>> data1 = data1.map(operations=tokenizer_op) - >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32], - >>> # ["offsets_limit", dtype=uint32]} - >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]', - >>> max_bytes_per_token=100, with_offsets=True) - >>> data2 = data2.map(operations=tokenizer_op, - >>> input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"]) + >>> data1 = data1.map(operations=text.Ngram(3, separator=" ")) """ - @check_wordpiece_tokenizer - def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, - unknown_token='[UNK]', with_offsets=False): - self.vocab = vocab - self.suffix_indicator = suffix_indicator - self.max_bytes_per_token = max_bytes_per_token - self.unknown_token = unknown_token - self.with_offsets = with_offsets - super().__init__(self.vocab, self.suffix_indicator, self.max_bytes_per_token, - self.unknown_token, self.with_offsets) - - -DE_C_INTER_SENTENCEPIECE_LOADTYPE = { - SPieceTokenizerLoadType.FILE: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KFILE, - SPieceTokenizerLoadType.MODEL: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KMODEL -} + @check_ngram + def __init__(self, n, left_pad=("", 0), right_pad=("", 0), separator=" "): + self.ngrams = n + self.left_pad = left_pad + self.right_pad = right_pad + self.separator = separator -DE_C_INTER_SENTENCEPIECE_OUTTYPE = { - SPieceTokenizerOutType.STRING: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KString, - SPieceTokenizerOutType.INT: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KINT -} + def parse(self): + return cde.NgramOperation(self.ngrams, self.left_pad, self.right_pad, self.separator) class SentencePieceTokenizer(TextTensorOperation): @@ -441,75 +364,336 @@ class SentencePieceTokenizer(TextTensorOperation): return cde.SentencePieceTokenizerOperation(self.mode, DE_C_INTER_SENTENCEPIECE_OUTTYPE[self.out_type]) -if platform.system().lower() != 'windows': - class WhitespaceTokenizer(TextTensorOperation): - """ - Tokenize a scalar tensor of UTF-8 string on ICU4C defined whitespaces, such as: ' ', '\\\\t', '\\\\r', '\\\\n'. +class SlidingWindow(TextTensorOperation): + """ + TensorOp to construct a tensor from data (only 1-D for now), where each element in the dimension axis + is a slice of data starting at the corresponding position, with a specified width. - Note: - WhitespaceTokenizer is not supported on Windows platform yet. + Args: + width (int): The width of the window. It must be an integer and greater than zero. + axis (int, optional): The axis along which the sliding window is computed (default=0). - Args: - with_offsets (bool, optional): If or not output offsets of tokens (default=False). + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # Data before + >>> # | col1 | + >>> # +-------------+ + >>> # | [1,2,3,4,5] | + >>> # +-------------+ + >>> data1 = data1.map(operations=text.SlidingWindow(3, 0)) + >>> # Data after + >>> # | col1 | + >>> # +-------------+ + >>> # | [[1,2,3], | + >>> # | [2,3,4], | + >>> # | [3,4,5]] | + >>> # +--------------+ + """ - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> # If with_offsets=False, default output one column {["text", dtype=str]} - >>> tokenizer_op = text.WhitespaceTokenizer() - >>> data1 = data1.map(operations=tokenizer_op) - >>> # If with_offsets=False, then output three columns {["token", dtype=str], - >>> # ["offsets_start", dtype=uint32], - >>> # ["offsets_limit", dtype=uint32]} - >>> tokenizer_op = text.WhitespaceTokenizer(True) - >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], - >>> output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"]) - """ + @check_slidingwindow + def __init__(self, width, axis=0): + self.width = width + self.axis = axis - @check_with_offsets - def __init__(self, with_offsets=False): + def parse(self): + return cde.SlidingWindowOperation(self.width, self.axis) + + +class ToNumber(TextTensorOperation): + """ + Tensor operation to convert every element of a string tensor to a number. + + Strings are casted according to the rules specified in the following links: + https://en.cppreference.com/w/cpp/string/basic_string/stof, + https://en.cppreference.com/w/cpp/string/basic_string/stoul, + except that any strings which represent negative numbers cannot be cast to an + unsigned integer type. + + Args: + data_type (mindspore.dtype): mindspore.dtype to be casted to. Must be + a numeric type. + + Raises: + RuntimeError: If strings are invalid to cast, or are out of range after being casted. + + Examples: + >>> import mindspore.dataset.text as text + >>> import mindspore.common.dtype as mstype + >>> + >>> to_number_op = text.ToNumber(mstype.int8) + >>> data1 = data1.map(operations=to_number_op) + """ + + @check_to_number + def __init__(self, data_type): + data_type = mstype_to_detype(data_type) + self.data_type = str(data_type) + + def parse(self): + return cde.ToNumberOperation(self.data_type) + + +class TruncateSequencePair(TextTensorOperation): + """ + Truncate a pair of rank-1 tensors such that the total length is less than max_length. + + This operation takes two input tensors and returns two output Tensors. + + Args: + max_length (int): Maximum length required. + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # Data before + >>> # | col1 | col2 | + >>> # +---------+---------| + >>> # | [1,2,3] | [4,5] | + >>> # +---------+---------+ + >>> data1 = data1.map(operations=text.TruncateSequencePair(4)) + >>> # Data after + >>> # | col1 | col2 | + >>> # +---------+---------+ + >>> # | [1,2] | [4,5] | + >>> # +---------+---------+ + """ + + @check_pair_truncate + def __init__(self, max_length): + self.max_length = max_length + + def parse(self): + return cde.TruncateSequencePairOperation(self.max_length) + + +class UnicodeCharTokenizer(TextTensorOperation): + """ + Tokenize a scalar tensor of UTF-8 string to Unicode characters. + + Args: + with_offsets (bool, optional): If or not output offsets of tokens (default=False). + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # If with_offsets=False, default output one column {["text", dtype=str]} + >>> tokenizer_op = text.UnicodeCharTokenizer() + >>> data1 = data1.map(operations=tokenizer_op) + >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32], + >>> # ["offsets_limit", dtype=uint32]} + >>> tokenizer_op = text.UnicodeCharTokenizer(True) + >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) + """ + + @check_with_offsets + def __init__(self, with_offsets=False): + self.with_offsets = with_offsets + + def parse(self): + return cde.UnicodeCharTokenizerOperation(self.with_offsets) + + +# TODO(alexyuyue): Need to decouple WordpieceTokenizerOp to WordpieceTokenizerOperation after it's supported in C++ +class WordpieceTokenizer(cde.WordpieceTokenizerOp): + """ + Tokenize scalar token or 1-D tokens to 1-D subword tokens. + + Args: + vocab (Vocab): A vocabulary object. + suffix_indicator (str, optional): Used to show that the subword is the last part of a word (default='##'). + max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split (default=100). + unknown_token (str, optional): When a token cannot be found: if 'unknown_token' is empty string, + return the token directly, else return 'unknown_token' (default='[UNK]'). + with_offsets (bool, optional): If or not output offsets of tokens (default=False). + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # If with_offsets=False, default output one column {["text", dtype=str]} + >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]', + >>> max_bytes_per_token=100, with_offsets=False) + >>> data1 = data1.map(operations=tokenizer_op) + >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32], + >>> # ["offsets_limit", dtype=uint32]} + >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]', + >>> max_bytes_per_token=100, with_offsets=True) + >>> data2 = data2.map(operations=tokenizer_op, + >>> input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) + """ + + @check_wordpiece_tokenizer + def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, + unknown_token='[UNK]', with_offsets=False): + self.vocab = vocab + self.suffix_indicator = suffix_indicator + self.max_bytes_per_token = max_bytes_per_token + self.unknown_token = unknown_token + self.with_offsets = with_offsets + super().__init__(self.vocab, self.suffix_indicator, self.max_bytes_per_token, + self.unknown_token, self.with_offsets) + + +class PythonTokenizer: + """ + Callable class to be used for user-defined string tokenizer. + + Args: + tokenizer (Callable): Python function that takes a `str` and returns a list of `str` as tokens. + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> def my_tokenizer(line): + >>> return line.split() + >>> data1 = data1.map(operations=text.PythonTokenizer(my_tokenizer)) + """ + + @check_python_tokenizer + def __init__(self, tokenizer): + self.tokenizer = np.vectorize(lambda x: np.array(tokenizer(x), dtype='U'), signature='()->(n)') + + def __call__(self, in_array): + in_array = to_str(in_array) + tokens = self.tokenizer(in_array) + return tokens + +if platform.system().lower() != 'windows': + DE_C_INTER_NORMALIZE_FORM = { + NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE, + NormalizeForm.NFC: cde.NormalizeForm.DE_NORMALIZE_NFC, + NormalizeForm.NFKC: cde.NormalizeForm.DE_NORMALIZE_NFKC, + NormalizeForm.NFD: cde.NormalizeForm.DE_NORMALIZE_NFD, + NormalizeForm.NFKD: cde.NormalizeForm.DE_NORMALIZE_NFKD + } + + + class BasicTokenizer(TextTensorOperation): + """ + Tokenize a scalar tensor of UTF-8 string by specific rules. + + Note: + BasicTokenizer is not supported on Windows platform yet. + + Args: + lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation + on input text to fold the text to lower case and strip accents characters. If False, only apply + NormalizeUTF8('normalization_form' mode) operation on input text (default=False). + keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False). + normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode. This is + only effective when 'lower_case' is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE). + preserve_unused_token (bool, optional): If True, do not split special tokens like + '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True). + with_offsets (bool, optional): If or not output offsets of tokens (default=False). + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # If with_offsets=False, default output one column {["text", dtype=str]} + >>> tokenizer_op = text.BasicTokenizer(lower_case=False, + >>> keep_whitespace=False, + >>> normalization_form=NormalizeForm.NONE, + >>> preserve_unused_token=True, + >>> with_offsets=False) + >>> data1 = data1.map(operations=tokenizer_op) + >>> # If with_offsets=False, then output three columns {["token", dtype=str], + >>> # ["offsets_start", dtype=uint32], + >>> # ["offsets_limit", dtype=uint32]} + >>> tokenizer_op = text.BasicTokenizer(lower_case=False, + >>> keep_whitespace=False, + >>> normalization_form=NormalizeForm.NONE, + >>> preserve_unused_token=True, + >>> with_offsets=True) + >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) + """ + + @check_basic_tokenizer + def __init__(self, lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE, + preserve_unused_token=True, with_offsets=False): + if not isinstance(normalization_form, NormalizeForm): + raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.") + + self.lower_case = lower_case + self.keep_whitespace = keep_whitespace + self.normalization_form = DE_C_INTER_NORMALIZE_FORM[normalization_form] + self.preserve_unused_token = preserve_unused_token self.with_offsets = with_offsets def parse(self): - return cde.WhitespaceTokenizerOperation(self.with_offsets) + return cde.BasicTokenizerOperation(self.lower_case, self.keep_whitespace, self.normalization_form, + self.preserve_unused_token, self.with_offsets) - class UnicodeScriptTokenizer(TextTensorOperation): + class BertTokenizer(TextTensorOperation): """ - Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries. + Tokenizer used for Bert text process. Note: - UnicodeScriptTokenizer is not supported on Windows platform yet. + BertTokenizer is not supported on Windows platform yet. Args: - keep_whitespace (bool, optional): If or not emit whitespace tokens (default=False). + vocab (Vocab): A vocabulary object. + suffix_indicator (str, optional): Used to show that the subword is the last part of a word (default='##'). + max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split (default=100). + unknown_token (str, optional): When a token cannot be found: if 'unknown_token' is empty string, + return the token directly, else return 'unknown_token'(default='[UNK]'). + lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation + on input text to fold the text to lower case and strip accented characters. If False, only apply + NormalizeUTF8('normalization_form' mode) operation on input text (default=False). + keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False). + normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode, + only effective when 'lower_case' is False. See NormalizeUTF8 for details (default='NONE'). + preserve_unused_token (bool, optional): If True, do not split special tokens like + '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True). with_offsets (bool, optional): If or not output offsets of tokens (default=False). Examples: >>> import mindspore.dataset.text as text >>> >>> # If with_offsets=False, default output one column {["text", dtype=str]} - >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=False) + >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100, + >>> unknown_token='[UNK]', lower_case=False, keep_whitespace=False, + >>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True, + >>> with_offsets=False) >>> data1 = data1.map(operations=tokenizer_op) >>> # If with_offsets=False, then output three columns {["token", dtype=str], >>> # ["offsets_start", dtype=uint32], >>> # ["offsets_limit", dtype=uint32]} - >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True) + >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100, + >>> unknown_token='[UNK]', lower_case=False, keep_whitespace=False, + >>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True, + >>> with_offsets=True) >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], >>> output_columns=["token", "offsets_start", "offsets_limit"], >>> column_order=["token", "offsets_start", "offsets_limit"]) """ - @check_unicode_script_tokenizer - def __init__(self, keep_whitespace=False, with_offsets=False): - keep_whitespace = replace_none(keep_whitespace, False) - with_offsets = replace_none(with_offsets, False) + @check_bert_tokenizer + def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, unknown_token='[UNK]', + lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE, + preserve_unused_token=True, with_offsets=False): + if not isinstance(normalization_form, NormalizeForm): + raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.") + + self.vocab = vocab + self.suffix_indicator = suffix_indicator + self.max_bytes_per_token = max_bytes_per_token + self.unknown_token = unknown_token + self.lower_case = lower_case self.keep_whitespace = keep_whitespace + self.normalization_form = DE_C_INTER_NORMALIZE_FORM[normalization_form] + self.preserve_unused_token = preserve_unused_token self.with_offsets = with_offsets def parse(self): - return cde.UnicodeScriptTokenizerOperation(self.keep_whitespace, self.with_offsets) + return cde.BertTokenizerOperation(self.vocab, self.suffix_indicator, self.max_bytes_per_token, + self.unknown_token, self.lower_case, self.keep_whitespace, + self.normalization_form, self.preserve_unused_token, self.with_offsets) class CaseFold(TextTensorOperation): @@ -530,15 +714,6 @@ if platform.system().lower() != 'windows': return cde.CaseFoldOperation() - DE_C_INTER_NORMALIZE_FORM = { - NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE, - NormalizeForm.NFC: cde.NormalizeForm.DE_NORMALIZE_NFC, - NormalizeForm.NFKC: cde.NormalizeForm.DE_NORMALIZE_NFKC, - NormalizeForm.NFD: cde.NormalizeForm.DE_NORMALIZE_NFD, - NormalizeForm.NFKD: cde.NormalizeForm.DE_NORMALIZE_NFKD - } - - class NormalizeUTF8(TextTensorOperation): """ Apply normalize operation on UTF-8 string tensor. @@ -651,218 +826,71 @@ if platform.system().lower() != 'windows': return cde.RegexTokenizerOperation(self.delim_pattern, self.keep_delim_pattern, self.with_offsets) - class BasicTokenizer(TextTensorOperation): + class UnicodeScriptTokenizer(TextTensorOperation): """ - Tokenize a scalar tensor of UTF-8 string by specific rules. + Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries. Note: - BasicTokenizer is not supported on Windows platform yet. + UnicodeScriptTokenizer is not supported on Windows platform yet. Args: - lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation - on input text to fold the text to lower case and strip accents characters. If False, only apply - NormalizeUTF8('normalization_form' mode) operation on input text (default=False). - keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False). - normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode. This is - only effective when 'lower_case' is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE). - preserve_unused_token (bool, optional): If True, do not split special tokens like - '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True). + keep_whitespace (bool, optional): If or not emit whitespace tokens (default=False). with_offsets (bool, optional): If or not output offsets of tokens (default=False). Examples: >>> import mindspore.dataset.text as text >>> >>> # If with_offsets=False, default output one column {["text", dtype=str]} - >>> tokenizer_op = text.BasicTokenizer(lower_case=False, - >>> keep_whitespace=False, - >>> normalization_form=NormalizeForm.NONE, - >>> preserve_unused_token=True, - >>> with_offsets=False) + >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=False) >>> data1 = data1.map(operations=tokenizer_op) >>> # If with_offsets=False, then output three columns {["token", dtype=str], >>> # ["offsets_start", dtype=uint32], >>> # ["offsets_limit", dtype=uint32]} - >>> tokenizer_op = text.BasicTokenizer(lower_case=False, - >>> keep_whitespace=False, - >>> normalization_form=NormalizeForm.NONE, - >>> preserve_unused_token=True, - >>> with_offsets=True) + >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True) >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], >>> output_columns=["token", "offsets_start", "offsets_limit"], >>> column_order=["token", "offsets_start", "offsets_limit"]) """ - @check_basic_tokenizer - def __init__(self, lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE, - preserve_unused_token=True, with_offsets=False): - if not isinstance(normalization_form, NormalizeForm): - raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.") - - self.lower_case = lower_case + @check_unicode_script_tokenizer + def __init__(self, keep_whitespace=False, with_offsets=False): + keep_whitespace = replace_none(keep_whitespace, False) + with_offsets = replace_none(with_offsets, False) self.keep_whitespace = keep_whitespace - self.normalization_form = DE_C_INTER_NORMALIZE_FORM[normalization_form] - self.preserve_unused_token = preserve_unused_token self.with_offsets = with_offsets def parse(self): - return cde.BasicTokenizerOperation(self.lower_case, self.keep_whitespace, self.normalization_form, - self.preserve_unused_token, self.with_offsets) + return cde.UnicodeScriptTokenizerOperation(self.keep_whitespace, self.with_offsets) - class BertTokenizer(TextTensorOperation): + class WhitespaceTokenizer(TextTensorOperation): """ - Tokenizer used for Bert text process. + Tokenize a scalar tensor of UTF-8 string on ICU4C defined whitespaces, such as: ' ', '\\\\t', '\\\\r', '\\\\n'. Note: - BertTokenizer is not supported on Windows platform yet. + WhitespaceTokenizer is not supported on Windows platform yet. Args: - vocab (Vocab): A vocabulary object. - suffix_indicator (str, optional): Used to show that the subword is the last part of a word (default='##'). - max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split (default=100). - unknown_token (str, optional): When a token cannot be found: if 'unknown_token' is empty string, - return the token directly, else return 'unknown_token'(default='[UNK]'). - lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation - on input text to fold the text to lower case and strip accented characters. If False, only apply - NormalizeUTF8('normalization_form' mode) operation on input text (default=False). - keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False). - normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode, - only effective when 'lower_case' is False. See NormalizeUTF8 for details (default='NONE'). - preserve_unused_token (bool, optional): If True, do not split special tokens like - '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True). with_offsets (bool, optional): If or not output offsets of tokens (default=False). Examples: >>> import mindspore.dataset.text as text >>> >>> # If with_offsets=False, default output one column {["text", dtype=str]} - >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100, - >>> unknown_token='[UNK]', lower_case=False, keep_whitespace=False, - >>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True, - >>> with_offsets=False) + >>> tokenizer_op = text.WhitespaceTokenizer() >>> data1 = data1.map(operations=tokenizer_op) >>> # If with_offsets=False, then output three columns {["token", dtype=str], >>> # ["offsets_start", dtype=uint32], >>> # ["offsets_limit", dtype=uint32]} - >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100, - >>> unknown_token='[UNK]', lower_case=False, keep_whitespace=False, - >>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True, - >>> with_offsets=True) + >>> tokenizer_op = text.WhitespaceTokenizer(True) >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], >>> output_columns=["token", "offsets_start", "offsets_limit"], >>> column_order=["token", "offsets_start", "offsets_limit"]) """ - @check_bert_tokenizer - def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, unknown_token='[UNK]', - lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE, - preserve_unused_token=True, with_offsets=False): - if not isinstance(normalization_form, NormalizeForm): - raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.") - - self.vocab = vocab - self.suffix_indicator = suffix_indicator - self.max_bytes_per_token = max_bytes_per_token - self.unknown_token = unknown_token - self.lower_case = lower_case - self.keep_whitespace = keep_whitespace - self.normalization_form = DE_C_INTER_NORMALIZE_FORM[normalization_form] - self.preserve_unused_token = preserve_unused_token + @check_with_offsets + def __init__(self, with_offsets=False): self.with_offsets = with_offsets def parse(self): - return cde.BertTokenizerOperation(self.vocab, self.suffix_indicator, self.max_bytes_per_token, - self.unknown_token, self.lower_case, self.keep_whitespace, - self.normalization_form, self.preserve_unused_token, self.with_offsets) - - -class TruncateSequencePair(TextTensorOperation): - """ - Truncate a pair of rank-1 tensors such that the total length is less than max_length. - - This operation takes two input tensors and returns two output Tensors. - - Args: - max_length (int): Maximum length required. - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> # Data before - >>> # | col1 | col2 | - >>> # +---------+---------| - >>> # | [1,2,3] | [4,5] | - >>> # +---------+---------+ - >>> data1 = data1.map(operations=text.TruncateSequencePair(4)) - >>> # Data after - >>> # | col1 | col2 | - >>> # +---------+---------+ - >>> # | [1,2] | [4,5] | - >>> # +---------+---------+ - """ - - @check_pair_truncate - def __init__(self, max_length): - self.max_length = max_length - - def parse(self): - return cde.TruncateSequencePairOperation(self.max_length) - - -class ToNumber(TextTensorOperation): - """ - Tensor operation to convert every element of a string tensor to a number. - - Strings are casted according to the rules specified in the following links: - https://en.cppreference.com/w/cpp/string/basic_string/stof, - https://en.cppreference.com/w/cpp/string/basic_string/stoul, - except that any strings which represent negative numbers cannot be cast to an - unsigned integer type. - - Args: - data_type (mindspore.dtype): mindspore.dtype to be casted to. Must be - a numeric type. - - Raises: - RuntimeError: If strings are invalid to cast, or are out of range after being casted. - - Examples: - >>> import mindspore.dataset.text as text - >>> import mindspore.common.dtype as mstype - >>> - >>> to_number_op = text.ToNumber(mstype.int8) - >>> data1 = data1.map(operations=to_number_op) - """ - - @check_to_number - def __init__(self, data_type): - data_type = mstype_to_detype(data_type) - self.data_type = str(data_type) - - def parse(self): - return cde.ToNumberOperation(self.data_type) - - -class PythonTokenizer: - """ - Callable class to be used for user-defined string tokenizer. - - Args: - tokenizer (Callable): Python function that takes a `str` and returns a list of `str` as tokens. - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> def my_tokenizer(line): - >>> return line.split() - >>> data1 = data1.map(operations=text.PythonTokenizer(my_tokenizer)) - """ - - @check_python_tokenizer - def __init__(self, tokenizer): - self.tokenizer = np.vectorize(lambda x: np.array(tokenizer(x), dtype='U'), signature='()->(n)') - - def __call__(self, in_array): - in_array = to_str(in_array) - tokens = self.tokenizer(in_array) - return tokens + return cde.WhitespaceTokenizerOperation(self.with_offsets) diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py index 2a79f2d182..ae4b8e9a94 100644 --- a/mindspore/dataset/transforms/c_transforms.py +++ b/mindspore/dataset/transforms/c_transforms.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,6 +26,14 @@ from .validators import check_num_classes, check_de_type, check_fill_value, chec from ..core.datatypes import mstype_to_detype +class TensorOperation: + def __call__(self): + raise NotImplementedError("TensorOperation has to implement __call__() method.") + + def parse(self): + raise NotImplementedError("TensorOperation has to implement parse() method.") + + class OneHot(cde.OneHotOp): """ Tensor operation to apply one hot encoding. @@ -304,7 +312,7 @@ class Unique(cde.UniqueOp): Also return an index tensor that contains the index of each element of the input tensor in the Unique output tensor. - Finally, return a count tensor that constains the count of each element of + Finally, return a count tensor that contains the count of each element of the output tensor in the input tensor. Note: diff --git a/mindspore/dataset/transforms/validators.py b/mindspore/dataset/transforms/validators.py index 6072397220..f3ae5cea8a 100644 --- a/mindspore/dataset/transforms/validators.py +++ b/mindspore/dataset/transforms/validators.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -237,8 +237,8 @@ def check_compose_list(method): type_check(transforms, (list,), transforms) if not transforms: raise ValueError("transforms list is empty.") - for i, transfrom in enumerate(transforms): - if not callable(transfrom): + for i, transform in enumerate(transforms): + if not callable(transform): raise ValueError("transforms[{}] is not callable.".format(i)) return method(self, *args, **kwargs) @@ -269,9 +269,10 @@ def check_random_apply(method): [transforms, prob], _ = parse_user_args(method, *args, **kwargs) type_check(transforms, (list,), "transforms") - for i, transfrom in enumerate(transforms): - if not callable(transfrom): - raise ValueError("transforms[{}] is not callable.".format(i)) + for i, transform in enumerate(transforms): + if str(transform).find("c_transform") >= 0: + raise ValueError("transforms[{}] is not a py transforms. Should not use a c transform in py transform" \ + .format(i)) if prob is not None: type_check(prob, (float, int,), "prob") @@ -290,9 +291,10 @@ def check_transforms_list(method): [transforms], _ = parse_user_args(method, *args, **kwargs) type_check(transforms, (list,), "transforms") - for i, transfrom in enumerate(transforms): - if not callable(transfrom): - raise ValueError("transforms[{}] is not callable.".format(i)) + for i, transform in enumerate(transforms): + if str(transform).find("c_transform") >= 0: + raise ValueError("transforms[{}] is not a py transforms. Should not use a c transform in py transform" \ + .format(i)) return method(self, *args, **kwargs) return new_method diff --git a/mindspore/dataset/vision/c_transforms.py b/mindspore/dataset/vision/c_transforms.py index c83eaf618a..5496dd5c5d 100644 --- a/mindspore/dataset/vision/c_transforms.py +++ b/mindspore/dataset/vision/c_transforms.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -55,21 +55,55 @@ from .validators import check_prob, check_crop, check_resize_interpolation, chec check_bounding_box_augment_cpp, check_random_select_subpolicy_op, check_auto_contrast, check_random_affine, \ check_random_solarize, check_soft_dvpp_decode_random_crop_resize_jpeg, check_positive_degrees, FLOAT_MAX_INTEGER, \ check_cut_mix_batch_c, check_posterize +from ..transforms.c_transforms import TensorOperation + + +class ImageTensorOperation(TensorOperation): + """ + Base class of Image Tensor Ops + """ + def __call__(self, input_tensor): + if not isinstance(input_tensor, list): + input_list = [input_tensor] + else: + input_list = input_tensor + tensor_list = [] + for tensor in input_list: + if not isinstance(tensor, (np.ndarray, Image.Image)): + raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(tensor))) + tensor_list.append(cde.Tensor(np.asarray(tensor))) + callable_op = cde.Execute(self.parse()) + output_list = callable_op(tensor_list) + for i, element in enumerate(output_list): + arr = element.as_array() + if arr.dtype.char == 'S': + output_list[i] = np.char.decode(arr) + else: + output_list[i] = arr + if not isinstance(input_tensor, list) and len(output_list) == 1: + output_list = output_list[0] + return output_list + + def parse(self): + raise NotImplementedError("ImageTensorOperation has to implement parse() method.") -DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, - Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, - Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC, - Inter.AREA: cde.InterpolationMode.DE_INTER_AREA} DE_C_BORDER_TYPE = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT, Border.EDGE: cde.BorderType.DE_BORDER_EDGE, Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT, Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC} + DE_C_IMAGE_BATCH_FORMAT = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC, ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW} +DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, + Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, + Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC, + Inter.AREA: cde.InterpolationMode.DE_INTER_AREA} + + def parse_padding(padding): if isinstance(padding, numbers.Number): padding = [padding] * 4 @@ -81,9 +115,6 @@ def parse_padding(padding): padding = tuple(padding) return padding -class ImageTensorOperation: - def parse(self): - raise NotImplementedError("ImageTensorOperation has to implement parse method.") class AutoContrast(ImageTensorOperation): """ @@ -112,58 +143,122 @@ class AutoContrast(ImageTensorOperation): return cde.AutoContrastOperation(self.cutoff, self.ignore) -class RandomSharpness(ImageTensorOperation): +class BoundingBoxAugment(ImageTensorOperation): """ - Adjust the sharpness of the input image by a fixed or random degree. Degree of 0.0 gives a blurred image, - degree of 1.0 gives the original image, and degree of 2.0 gives a sharpened image. + Apply a given image transform on a random selection of bounding box regions of a given image. Args: - degrees (tuple, optional): Range of random sharpness adjustment degrees. It should be in (min, max) format. - If min=max, then it is a single fixed magnitude operation (default = (0.1, 1.9)). + transform: C++ transformation function to be applied on random selection + of bounding box regions of a given image. + ratio (float, optional): Ratio of bounding boxes to apply augmentation on. + Range: [0, 1] (default=0.3). - Raises: - TypeError : If degrees is not a list or tuple. - ValueError: If degrees is negative. - ValueError: If degrees is in (max, min) format instead of (min, max). + Examples: + >>> # set bounding box operation with ratio of 1 to apply rotation on all bounding boxes + >>> bbox_aug_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1) + >>> # map to apply ops + >>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op], + ... input_columns=["image", "bbox"], + ... output_columns=["image", "bbox"], + ... column_order=["image", "bbox"]) + """ + + @check_bounding_box_augment_cpp + def __init__(self, transform, ratio=0.3): + self.ratio = ratio + self.transform = transform + + def parse(self): + if self.transform and getattr(self.transform, 'parse', None): + transform = self.transform.parse() + else: + transform = self.transform + return cde.BoundingBoxAugmentOperation(transform, self.ratio) + + +class CenterCrop(ImageTensorOperation): + """ + Crops the input image at the center to the given size. + + Args: + size (Union[int, sequence]): The output size of the cropped image. + If size is an integer, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomSharpness(degrees=(0.2, 1.9))] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + >>> # crop image to a square + >>> transforms_list1 = [c_vision.Decode(), c_vision.CenterCrop(50)] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, ... input_columns=["image"]) + >>> # crop image to portrait style + >>> transforms_list2 = [c_vision.Decode(), c_vision.CenterCrop((60, 40))] + >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, + ... input_columns=["image"]) """ - @check_positive_degrees - def __init__(self, degrees=(0.1, 1.9)): - self.degrees = degrees + @check_crop + def __init__(self, size): + if isinstance(size, int): + size = (size, size) + self.size = size def parse(self): - return cde.RandomSharpnessOperation(self.degrees) + return cde.CenterCropOperation(self.size) -class Equalize(ImageTensorOperation): +class CutMixBatch(ImageTensorOperation): """ - Apply histogram equalization on input image. + Apply CutMix transformation on input batch of images and labels. + Note that you need to make labels into one-hot format and batch before calling this function. + + Args: + image_batch_format (Image Batch Format): The method of padding. Can be any of + [ImageBatchFormat.NHWC, ImageBatchFormat.NCHW] + alpha (float, optional): hyperparameter of beta distribution (default = 1.0). + prob (float, optional): The probability by which CutMix is applied to each image (default = 1.0). Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.Equalize()] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) + >>> from mindspore.dataset.vision import ImageBatchFormat + >>> onehot_op = c_transforms.OneHot(num_classes=10) + >>> image_folder_dataset= image_folder_dataset.map(operations=onehot_op, + ... input_columns=["label"]) + >>> cutmix_batch_op = c_vision.CutMixBatch(ImageBatchFormat.NHWC, 1.0, 0.5) + >>> image_folder_dataset = image_folder_dataset.batch(5) + >>> image_folder_dataset = image_folder_dataset.map(operations=cutmix_batch_op, + ... input_columns=["image", "label"]) """ + + @check_cut_mix_batch_c + def __init__(self, image_batch_format, alpha=1.0, prob=1.0): + self.image_batch_format = image_batch_format.value + self.alpha = alpha + self.prob = prob + def parse(self): - return cde.EqualizeOperation() + return cde.CutMixBatchOperation(DE_C_IMAGE_BATCH_FORMAT[self.image_batch_format], self.alpha, self.prob) -class Invert(ImageTensorOperation): +class CutOut(ImageTensorOperation): """ - Apply invert on input image in RGB mode. + Randomly cut (mask) out a given number of square patches from the input NumPy image array. + + Args: + length (int): The side length of each square patch. + num_patches (int, optional): Number of patches to be cut out of an image (default=1). Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.Invert()] + >>> transforms_list = [c_vision.Decode(), c_vision.CutOut(80, num_patches=10)] >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) """ + + @check_cutout + def __init__(self, length, num_patches=1): + self.length = length + self.num_patches = num_patches + def parse(self): - return cde.InvertOperation() + return cde.CutOutOperation(self.length, self.num_patches) class Decode(ImageTensorOperation): @@ -203,59 +298,47 @@ class Decode(ImageTensorOperation): return cde.DecodeOperation(self.rgb) -class CutMixBatch(ImageTensorOperation): +class Equalize(ImageTensorOperation): """ - Apply CutMix transformation on input batch of images and labels. - Note that you need to make labels into one-hot format and batch before calling this function. - - Args: - image_batch_format (Image Batch Format): The method of padding. Can be any of - [ImageBatchFormat.NHWC, ImageBatchFormat.NCHW] - alpha (float, optional): hyperparameter of beta distribution (default = 1.0). - prob (float, optional): The probability by which CutMix is applied to each image (default = 1.0). + Apply histogram equalization on input image. Examples: - >>> from mindspore.dataset.vision import ImageBatchFormat - >>> onehot_op = c_transforms.OneHot(num_classes=10) - >>> image_folder_dataset= image_folder_dataset.map(operations=onehot_op, - ... input_columns=["label"]) - >>> cutmix_batch_op = c_vision.CutMixBatch(ImageBatchFormat.NHWC, 1.0, 0.5) - >>> image_folder_dataset = image_folder_dataset.batch(5) - >>> image_folder_dataset = image_folder_dataset.map(operations=cutmix_batch_op, - ... input_columns=["image", "label"]) + >>> transforms_list = [c_vision.Decode(), c_vision.Equalize()] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) """ - - @check_cut_mix_batch_c - def __init__(self, image_batch_format, alpha=1.0, prob=1.0): - self.image_batch_format = image_batch_format.value - self.alpha = alpha - self.prob = prob - def parse(self): - return cde.CutMixBatchOperation(DE_C_IMAGE_BATCH_FORMAT[self.image_batch_format], self.alpha, self.prob) + return cde.EqualizeOperation() -class CutOut(ImageTensorOperation): +class HWC2CHW(ImageTensorOperation): """ - Randomly cut (mask) out a given number of square patches from the input NumPy image array. - - Args: - length (int): The side length of each square patch. - num_patches (int, optional): Number of patches to be cut out of an image (default=1). + Transpose the input image; shape (H, W, C) to shape (C, H, W). Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.CutOut(80, num_patches=10)] + >>> transforms_list = [c_vision.Decode(), + ... c_vision.RandomHorizontalFlip(0.75), + ... c_vision.RandomCrop(512), + ... c_vision.HWC2CHW()] >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) """ - @check_cutout - def __init__(self, length, num_patches=1): - self.length = length - self.num_patches = num_patches + def parse(self): + return cde.HwcToChwOperation() + +class Invert(ImageTensorOperation): + """ + Apply invert on input image in RGB mode. + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.Invert()] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ def parse(self): - return cde.CutOutOperation(self.length, self.num_patches) + return cde.InvertOperation() class MixUpBatch(ImageTensorOperation): @@ -313,22 +396,6 @@ class Normalize(ImageTensorOperation): self.mean = mean self.std = std - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image array to be normalized. - - Returns: - img (NumPy), Normalized Image array. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - normalize = cde.Execute(cde.NormalizeOperation(self.mean, self.std)) - img = normalize(cde.Tensor(np.asarray(img))) - return img.as_array() - def parse(self): return cde.NormalizeOperation(self.mean, self.std) @@ -360,67 +427,100 @@ class NormalizePad(ImageTensorOperation): self.std = std self.dtype = dtype - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image array to be normalizepad. - - Returns: - img (NumPy), NormalizePaded Image array. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - normalize_pad = cde.Execute(cde.NormalizePadOperation(self.mean, self.std, self.dtype)) - img = normalize_pad(cde.Tensor(np.asarray(img))) - return img.as_array() - def parse(self): return cde.NormalizePadOperation(self.mean, self.std, self.dtype) -class RandomAffine(ImageTensorOperation): +class Pad(ImageTensorOperation): """ - Apply Random affine transformation to the input image. + Pads the image according to padding parameters. Args: - degrees (int or float or sequence): Range of the rotation degrees. - If degrees is a number, the range will be (-degrees, degrees). - If degrees is a sequence, it should be (min, max). - translate (sequence, optional): Sequence (tx_min, tx_max, ty_min, ty_max) of minimum/maximum translation in - x(horizontal) and y(vertical) directions (default=None). - The horizontal and vertical shift is selected randomly from the range: - (tx_min*width, tx_max*width) and (ty_min*height, ty_max*height), respectively. - If a tuple or list of size 2, then a translate parallel to the X axis in the range of - (translate[0], translate[1]) is applied. - If a tuple of list of size 4, then a translate parallel to the X axis in the range of - (translate[0], translate[1]) and a translate parallel to the Y axis in the range of - (translate[2], translate[3]) are applied. - If None, no translation is applied. - scale (sequence, optional): Scaling factor interval (default=None, original scale is used). - shear (int or float or sequence, optional): Range of shear factor (default=None). - If a number, then a shear parallel to the X axis in the range of (-shear, +shear) is applied. - If a tuple or list of size 2, then a shear parallel to the X axis in the range of (shear[0], shear[1]) - is applied. - If a tuple of list of size 4, then a shear parallel to X axis in the range of (shear[0], shear[1]) - and a shear parallel to Y axis in the range of (shear[2], shear[3]) is applied. - If None, no shear is applied. - resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST). - If omitted, or if the image has mode "1" or "P", it is set to be Inter.NEAREST. - It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. - - - Inter.BILINEAR, means resample method is bilinear interpolation. - - - Inter.NEAREST, means resample method is nearest-neighbor interpolation. - - - Inter.BICUBIC, means resample method is bicubic interpolation. - - fill_value (tuple or int, optional): Optional fill_value to fill the area outside the transform - in the output image. There must be three elements in tuple and the value of single element is [0, 255]. - Used only in Pillow versions > 5.0.0 (default=0, filling is performed). - - Raises: + padding (Union[int, sequence]): The number of pixels to pad the image. + If a single number is provided, it pads all borders with this value. + If a tuple or list of 2 values are provided, it pads the (left and top) + with the first value and (right and bottom) with the second value. + If 4 values are provided as a list or tuple, + it pads the left, top, right and bottom respectively. + fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for + padding_mode Border.CONSTANT (default=0). + If it is an integer, it is used for all RGB channels. + If it is a 3-tuple, it is used to fill R, G, B channels respectively. + The fill_value values must be in range [0, 255]. + padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of + [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. + + - Border.CONSTANT, means it fills the border with constant values. + + - Border.EDGE, means it pads with the last value on the edge. + + - Border.REFLECT, means it reflects the values on the edge omitting the last + value of edge. + + - Border.SYMMETRIC, means it reflects the values on the edge repeating the last + value of edge. + + Examples: + >>> from mindspore.dataset.vision import Border + >>> transforms_list = [c_vision.Decode(), c_vision.Pad([100, 100, 100, 100])] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_pad + def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT): + padding = parse_padding(padding) + if isinstance(fill_value, int): + fill_value = tuple([fill_value] * 3) + self.padding = padding + self.fill_value = fill_value + self.padding_mode = padding_mode + + def parse(self): + return cde.PadOperation(self.padding, self.fill_value, DE_C_BORDER_TYPE[self.padding_mode]) + + +class RandomAffine(ImageTensorOperation): + """ + Apply Random affine transformation to the input image. + + Args: + degrees (int or float or sequence): Range of the rotation degrees. + If degrees is a number, the range will be (-degrees, degrees). + If degrees is a sequence, it should be (min, max). + translate (sequence, optional): Sequence (tx_min, tx_max, ty_min, ty_max) of minimum/maximum translation in + x(horizontal) and y(vertical) directions (default=None). + The horizontal and vertical shift is selected randomly from the range: + (tx_min*width, tx_max*width) and (ty_min*height, ty_max*height), respectively. + If a tuple or list of size 2, then a translate parallel to the X axis in the range of + (translate[0], translate[1]) is applied. + If a tuple of list of size 4, then a translate parallel to the X axis in the range of + (translate[0], translate[1]) and a translate parallel to the Y axis in the range of + (translate[2], translate[3]) are applied. + If None, no translation is applied. + scale (sequence, optional): Scaling factor interval (default=None, original scale is used). + shear (int or float or sequence, optional): Range of shear factor (default=None). + If a number, then a shear parallel to the X axis in the range of (-shear, +shear) is applied. + If a tuple or list of size 2, then a shear parallel to the X axis in the range of (shear[0], shear[1]) + is applied. + If a tuple of list of size 4, then a shear parallel to X axis in the range of (shear[0], shear[1]) + and a shear parallel to Y axis in the range of (shear[2], shear[3]) is applied. + If None, no shear is applied. + resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST). + If omitted, or if the image has mode "1" or "P", it is set to be Inter.NEAREST. + It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. + + - Inter.BILINEAR, means resample method is bilinear interpolation. + + - Inter.NEAREST, means resample method is nearest-neighbor interpolation. + + - Inter.BICUBIC, means resample method is bicubic interpolation. + + fill_value (tuple or int, optional): Optional fill_value to fill the area outside the transform + in the output image. There must be three elements in tuple and the value of single element is [0, 255]. + Used only in Pillow versions > 5.0.0 (default=0, filling is performed). + + Raises: ValueError: If degrees is negative. ValueError: If translation value is not between -1 and 1. ValueError: If scale is not positive. @@ -486,6 +586,82 @@ class RandomAffine(ImageTensorOperation): self.fill_value) +class RandomColor(ImageTensorOperation): + """ + Adjust the color of the input image by a fixed or random degree. + This operation works only with 3-channel color images. + + Args: + degrees (sequence, optional): Range of random color adjustment degrees. + It should be in (min, max) format. If min=max, then it is a + single fixed magnitude operation (default=(0.1, 1.9)). + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.RandomColor((0.5, 2.0))] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_positive_degrees + def __init__(self, degrees=(0.1, 1.9)): + self.degrees = degrees + + def parse(self): + return cde.RandomColorOperation(*self.degrees) + + +class RandomColorAdjust(ImageTensorOperation): + """ + Randomly adjust the brightness, contrast, saturation, and hue of the input image. + + Args: + brightness (Union[float, tuple], optional): Brightness adjustment factor (default=(1, 1)). Cannot be negative. + If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness]. + If it is a sequence, it should be [min, max] for the range. + contrast (Union[float, tuple], optional): Contrast adjustment factor (default=(1, 1)). Cannot be negative. + If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast]. + If it is a sequence, it should be [min, max] for the range. + saturation (Union[float, tuple], optional): Saturation adjustment factor (default=(1, 1)). Cannot be negative. + If it is a float, the factor is uniformly chosen from the range [max(0, 1-saturation), 1+saturation]. + If it is a sequence, it should be [min, max] for the range. + hue (Union[float, tuple], optional): Hue adjustment factor (default=(0, 0)). + If it is a float, the range will be [-hue, hue]. Value should be 0 <= hue <= 0.5. + If it is a sequence, it should be [min, max] where -0.5 <= min <= max <= 0.5. + + Examples: + >>> decode_op = c_vision.Decode() + >>> transform_op = c_vision.RandomColorAdjust(brightness=(0.5, 1), + ... contrast=(0.4, 1), + ... saturation=(0.3, 1)) + >>> transforms_list = [decode_op, transform_op] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_random_color_adjust + def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)): + brightness = self.expand_values(brightness) + contrast = self.expand_values(contrast) + saturation = self.expand_values(saturation) + hue = self.expand_values(hue, center=0, bound=(-0.5, 0.5), non_negative=False) + + self.brightness = brightness + self.contrast = contrast + self.saturation = saturation + self.hue = hue + + def expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True): + if isinstance(value, numbers.Number): + value = [center - value, center + value] + if non_negative: + value[0] = max(0, value[0]) + check_range(value, bound) + return (value[0], value[1]) + + def parse(self): + return cde.RandomColorAdjustOperation(self.brightness, self.contrast, self.saturation, self.hue) + + class RandomCrop(ImageTensorOperation): """ Crop the input image at a random location. @@ -551,6 +727,58 @@ class RandomCrop(ImageTensorOperation): return cde.RandomCropOperation(self.size, self.padding, self.pad_if_needed, self.fill_value, border_type) +class RandomCropDecodeResize(ImageTensorOperation): + """ + Equivalent to RandomResizedCrop, but crops before decodes. + + Args: + size (Union[int, sequence]): The size of the output image. + If size is an integer, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). + scale (tuple, optional): Range [min, max) of respective size of the + original size to be cropped (default=(0.08, 1.0)). + ratio (tuple, optional): Range [min, max) of aspect ratio to be + cropped (default=(3. / 4., 4. / 3.)). + interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). + It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. + + - Inter.BILINEAR, means interpolation method is bilinear interpolation. + + - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. + + - Inter.BICUBIC, means interpolation method is bicubic interpolation. + + max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10). + If exceeded, fall back to use center_crop instead. + + Examples: + >>> from mindspore.dataset.vision import Inter + >>> resize_crop_decode_op = c_vision.RandomCropDecodeResize(size=(50, 75), + ... scale=(0.25, 0.5), + ... interpolation=Inter.NEAREST, + ... max_attempts=5) + >>> transforms_list = [resize_crop_decode_op] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_random_resize_crop + def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), + interpolation=Inter.BILINEAR, max_attempts=10): + if isinstance(size, int): + size = (size, size) + self.size = size + self.scale = scale + self.ratio = ratio + self.interpolation = interpolation + self.max_attempts = max_attempts + + def parse(self): + return cde.RandomCropDecodeResizeOperation(self.size, self.scale, self.ratio, + DE_C_INTER_MODE[self.interpolation], + self.max_attempts) + + class RandomCropWithBBox(ImageTensorOperation): """ Crop the input image at a random location and adjust bounding boxes accordingly. @@ -685,174 +913,54 @@ class RandomPosterize(ImageTensorOperation): return cde.RandomPosterizeOperation(bits) -class RandomVerticalFlip(ImageTensorOperation): +class RandomResizedCrop(ImageTensorOperation): """ - Flip the input image vertically, randomly with a given probability. + Crop the input image to a random size and aspect ratio. Args: - prob (float, optional): Probability of the image being flipped (default=0.5). - - Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlip(0.25)] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_prob - def __init__(self, prob=0.5): - self.prob = prob + size (Union[int, sequence]): The size of the output image. + If size is an integer, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). + scale (tuple, optional): Range [min, max) of respective size of the original + size to be cropped (default=(0.08, 1.0)). + ratio (tuple, optional): Range [min, max) of aspect ratio to be cropped + (default=(3. / 4., 4. / 3.)). + interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). + It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. - def parse(self): - return cde.RandomVerticalFlipOperation(self.prob) + - Inter.BILINEAR, means interpolation method is bilinear interpolation. + - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. -class RandomVerticalFlipWithBBox(ImageTensorOperation): - """ - Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly. + - Inter.BICUBIC, means interpolation method is bicubic interpolation. - Args: - prob (float, optional): Probability of the image being flipped (default=0.5). + max_attempts (int, optional): The maximum number of attempts to propose a valid + crop_area (default=10). If exceeded, fall back to use center_crop instead. Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlipWithBBox(0.20)] + >>> from mindspore.dataset.vision import Inter + >>> decode_op = c_vision.Decode() + >>> resize_crop_op = c_vision.RandomResizedCrop(size=(50, 75), scale=(0.25, 0.5), + ... interpolation=Inter.BILINEAR) + >>> transforms_list = [decode_op, resize_crop_op] >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) """ - @check_prob - def __init__(self, prob=0.5): - self.prob = prob - - def parse(self): - return cde.RandomVerticalFlipWithBBoxOperation(self.prob) - - -class BoundingBoxAugment(ImageTensorOperation): - """ - Apply a given image transform on a random selection of bounding box regions of a given image. - - Args: - transform: C++ transformation function to be applied on random selection - of bounding box regions of a given image. - ratio (float, optional): Ratio of bounding boxes to apply augmentation on. - Range: [0, 1] (default=0.3). - - Examples: - >>> # set bounding box operation with ratio of 1 to apply rotation on all bounding boxes - >>> bbox_aug_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1) - >>> # map to apply ops - >>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op], - ... input_columns=["image", "bbox"], - ... output_columns=["image", "bbox"], - ... column_order=["image", "bbox"]) - """ - - @check_bounding_box_augment_cpp - def __init__(self, transform, ratio=0.3): - self.ratio = ratio - self.transform = transform - - def parse(self): - if self.transform and getattr(self.transform, 'parse', None): - transform = self.transform.parse() - else: - transform = self.transform - return cde.BoundingBoxAugmentOperation(transform, self.ratio) - - -class Resize(ImageTensorOperation): - """ - Resize the input image to the given size. - - Args: - size (Union[int, sequence]): The output size of the resized image. - If size is an integer, the smaller edge of the image will be resized to this value with - the same image aspect ratio. - If size is a sequence of length 2, it should be (height, width). - interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR). - It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC]. - - - Inter.LINEAR, means interpolation method is bilinear interpolation. - - - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. - - - Inter.BICUBIC, means interpolation method is bicubic interpolation. - - - Inter.AREA, means interpolation method is pixel area interpolation. - - Examples: - >>> from mindspore.dataset.vision import Inter - >>> decode_op = c_vision.Decode() - >>> resize_op = c_vision.Resize([100, 75], Inter.BICUBIC) - >>> transforms_list = [decode_op, resize_op] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_resize_interpolation - def __init__(self, size, interpolation=Inter.LINEAR): + @check_random_resize_crop + def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), + interpolation=Inter.BILINEAR, max_attempts=10): if isinstance(size, int): - size = (size,) - self.size = size - self.interpolation = interpolation - - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image to be resized. - - Returns: - img (NumPy), Resized image. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - resize = cde.Execute(cde.ResizeOperation(self.size, DE_C_INTER_MODE[self.interpolation])) - img = resize(cde.Tensor(np.asarray(img))) - return img.as_array() - - def parse(self): - return cde.ResizeOperation(self.size, DE_C_INTER_MODE[self.interpolation]) - - -class ResizeWithBBox(ImageTensorOperation): - """ - Resize the input image to the given size and adjust bounding boxes accordingly. - - Args: - size (Union[int, sequence]): The output size of the resized image. - If size is an integer, smaller edge of the image will be resized to this value with - the same image aspect ratio. - If size is a sequence of length 2, it should be (height, width). - interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR). - It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC]. - - - Inter.LINEAR, means interpolation method is bilinear interpolation. - - - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. - - - Inter.BICUBIC, means interpolation method is bicubic interpolation. - - Examples: - >>> from mindspore.dataset.vision import Inter - >>> decode_op = c_vision.Decode() - >>> bbox_op = c_vision.ResizeWithBBox(50, Inter.NEAREST) - >>> transforms_list = [decode_op, bbox_op] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_resize_interpolation - def __init__(self, size, interpolation=Inter.LINEAR): + size = (size, size) self.size = size + self.scale = scale + self.ratio = ratio self.interpolation = interpolation + self.max_attempts = max_attempts def parse(self): - size = self.size - if isinstance(size, int): - size = (size,) - return cde.ResizeWithBBoxOperation(size, DE_C_INTER_MODE[self.interpolation]) + return cde.RandomResizedCropOperation(self.size, self.scale, self.ratio, DE_C_INTER_MODE[self.interpolation], + self.max_attempts) class RandomResizedCropWithBBox(ImageTensorOperation): @@ -904,160 +1012,69 @@ class RandomResizedCropWithBBox(ImageTensorOperation): DE_C_INTER_MODE[self.interpolation], self.max_attempts) -class RandomResizedCrop(ImageTensorOperation): +class RandomResize(ImageTensorOperation): """ - Crop the input image to a random size and aspect ratio. + Tensor operation to resize the input image using a randomly selected interpolation mode. Args: - size (Union[int, sequence]): The size of the output image. - If size is an integer, a square crop of size (size, size) is returned. + size (Union[int, sequence]): The output size of the resized image. + If size is an integer, smaller edge of the image will be resized to this value with + the same image aspect ratio. If size is a sequence of length 2, it should be (height, width). - scale (tuple, optional): Range [min, max) of respective size of the original - size to be cropped (default=(0.08, 1.0)). - ratio (tuple, optional): Range [min, max) of aspect ratio to be cropped - (default=(3. / 4., 4. / 3.)). - interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). - It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. - - - Inter.BILINEAR, means interpolation method is bilinear interpolation. - - - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. - - - Inter.BICUBIC, means interpolation method is bicubic interpolation. - - max_attempts (int, optional): The maximum number of attempts to propose a valid - crop_area (default=10). If exceeded, fall back to use center_crop instead. Examples: - >>> from mindspore.dataset.vision import Inter - >>> decode_op = c_vision.Decode() - >>> resize_crop_op = c_vision.RandomResizedCrop(size=(50, 75), scale=(0.25, 0.5), - ... interpolation=Inter.BILINEAR) - >>> transforms_list = [decode_op, resize_crop_op] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + >>> # randomly resize image, keeping aspect ratio + >>> transforms_list1 = [c_vision.Decode(), c_vision.RandomResize(50)] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, ... input_columns=["image"]) + >>> # randomly resize image to landscape style + >>> transforms_list2 = [c_vision.Decode(), c_vision.RandomResize((40, 60))] + >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, + ... input_columns=["image"]) """ - @check_random_resize_crop - def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), - interpolation=Inter.BILINEAR, max_attempts=10): - if isinstance(size, int): - size = (size, size) + @check_resize + def __init__(self, size): self.size = size - self.scale = scale - self.ratio = ratio - self.interpolation = interpolation - self.max_attempts = max_attempts def parse(self): - return cde.RandomResizedCropOperation(self.size, self.scale, self.ratio, DE_C_INTER_MODE[self.interpolation], - self.max_attempts) + size = self.size + if isinstance(size, int): + size = (size,) + return cde.RandomResizeOperation(size) -class CenterCrop(ImageTensorOperation): +class RandomResizeWithBBox(ImageTensorOperation): """ - Crops the input image at the center to the given size. + Tensor operation to resize the input image using a randomly selected interpolation mode and adjust + bounding boxes accordingly. Args: - size (Union[int, sequence]): The output size of the cropped image. - If size is an integer, a square crop of size (size, size) is returned. + size (Union[int, sequence]): The output size of the resized image. + If size is an integer, smaller edge of the image will be resized to this value with + the same image aspect ratio. If size is a sequence of length 2, it should be (height, width). Examples: - >>> # crop image to a square - >>> transforms_list1 = [c_vision.Decode(), c_vision.CenterCrop(50)] + >>> # randomly resize image with bounding boxes, keeping aspect ratio + >>> transforms_list1 = [c_vision.Decode(), c_vision.RandomResizeWithBBox(60)] >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, ... input_columns=["image"]) - >>> # crop image to portrait style - >>> transforms_list2 = [c_vision.Decode(), c_vision.CenterCrop((60, 40))] + >>> # randomly resize image with bounding boxes to portrait style + >>> transforms_list2 = [c_vision.Decode(), c_vision.RandomResizeWithBBox((80, 60))] >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, ... input_columns=["image"]) """ - @check_crop + @check_resize def __init__(self, size): - if isinstance(size, int): - size = (size, size) self.size = size def parse(self): - return cde.CenterCropOperation(self.size) - - -class RandomColor(ImageTensorOperation): - """ - Adjust the color of the input image by a fixed or random degree. - This operation works only with 3-channel color images. - - Args: - degrees (sequence, optional): Range of random color adjustment degrees. - It should be in (min, max) format. If min=max, then it is a - single fixed magnitude operation (default=(0.1, 1.9)). - - Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomColor((0.5, 2.0))] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_positive_degrees - def __init__(self, degrees=(0.1, 1.9)): - self.degrees = degrees - - def parse(self): - return cde.RandomColorOperation(*self.degrees) - - -class RandomColorAdjust(ImageTensorOperation): - """ - Randomly adjust the brightness, contrast, saturation, and hue of the input image. - - Args: - brightness (Union[float, tuple], optional): Brightness adjustment factor (default=(1, 1)). Cannot be negative. - If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness]. - If it is a sequence, it should be [min, max] for the range. - contrast (Union[float, tuple], optional): Contrast adjustment factor (default=(1, 1)). Cannot be negative. - If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast]. - If it is a sequence, it should be [min, max] for the range. - saturation (Union[float, tuple], optional): Saturation adjustment factor (default=(1, 1)). Cannot be negative. - If it is a float, the factor is uniformly chosen from the range [max(0, 1-saturation), 1+saturation]. - If it is a sequence, it should be [min, max] for the range. - hue (Union[float, tuple], optional): Hue adjustment factor (default=(0, 0)). - If it is a float, the range will be [-hue, hue]. Value should be 0 <= hue <= 0.5. - If it is a sequence, it should be [min, max] where -0.5 <= min <= max <= 0.5. - - Examples: - >>> decode_op = c_vision.Decode() - >>> transform_op = c_vision.RandomColorAdjust(brightness=(0.5, 1), - ... contrast=(0.4, 1), - ... saturation=(0.3, 1)) - >>> transforms_list = [decode_op, transform_op] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_random_color_adjust - def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)): - brightness = self.expand_values(brightness) - contrast = self.expand_values(contrast) - saturation = self.expand_values(saturation) - hue = self.expand_values(hue, center=0, bound=(-0.5, 0.5), non_negative=False) - - self.brightness = brightness - self.contrast = contrast - self.saturation = saturation - self.hue = hue - - def expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True): - if isinstance(value, numbers.Number): - value = [center - value, center + value] - if non_negative: - value[0] = max(0, value[0]) - check_range(value, bound) - return (value[0], value[1]) - - def parse(self): - return cde.RandomColorAdjustOperation(self.brightness, self.contrast, self.saturation, self.hue) + size = self.size + if isinstance(size, int): + size = (size,) + return cde.RandomResizeWithBBoxOperation(size) class RandomRotation(ImageTensorOperation): @@ -1116,374 +1133,238 @@ class RandomRotation(ImageTensorOperation): return cde.RandomRotationOperation(degrees, interpolation, expand, center, fill_value) -class Rescale(ImageTensorOperation): +class RandomSelectSubpolicy(ImageTensorOperation): """ - Tensor operation to rescale the input image. + Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples + (op, prob), where op is a TensorOp operation and prob is the probability that this op will be applied. Once + a sub-policy is selected, each op within the subpolicy with be applied in sequence according to its probability. Args: - rescale (float): Rescale factor. - shift (float): Shift factor. + policy (list(list(tuple(TensorOp, float))): List of sub-policies to choose from. Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.Rescale(1.0 / 255.0, -1.0)] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) + >>> policy = [[(c_vision.RandomRotation((45, 45)), 0.5), + ... (c_vision.RandomVerticalFlip(), 1), + ... (c_vision.RandomColorAdjust(), 0.8)], + ... [(c_vision.RandomRotation((90, 90)), 1), + ... (c_vision.RandomColorAdjust(), 0.2)]] + >>> image_folder_dataset_1 = image_folder_dataset.map(operations=c_vision.RandomSelectSubpolicy(policy), + ... input_columns=["image"]) """ - @check_rescale - def __init__(self, rescale, shift): - self.rescale = rescale - self.shift = shift + @check_random_select_subpolicy_op + def __init__(self, policy): + self.policy = policy - def __call__(self, img): + def parse(self): """ - Call method. - - Args: - img (NumPy or PIL image): Image to be rescaled. - - Returns: - img (NumPy), Rescaled image. + Return a C++ representation of the operator for execution """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - rescale = cde.Execute(cde.RescaleOperation(self.rescale, self.shift)) - img = rescale(cde.Tensor(np.asarray(img))) - return img.as_array() - - def parse(self): - return cde.RescaleOperation(self.rescale, self.shift) + policy = [] + for list_one in self.policy: + policy_one = [] + for list_two in list_one: + if list_two[0] and getattr(list_two[0], 'parse', None): + policy_one.append((list_two[0].parse(), list_two[1])) + else: + policy_one.append((list_two[0], list_two[1])) + policy.append(policy_one) + return cde.RandomSelectSubpolicyOperation(policy) -class RandomResize(ImageTensorOperation): +class RandomSharpness(ImageTensorOperation): """ - Tensor operation to resize the input image using a randomly selected interpolation mode. + Adjust the sharpness of the input image by a fixed or random degree. Degree of 0.0 gives a blurred image, + degree of 1.0 gives the original image, and degree of 2.0 gives a sharpened image. Args: - size (Union[int, sequence]): The output size of the resized image. - If size is an integer, smaller edge of the image will be resized to this value with - the same image aspect ratio. - If size is a sequence of length 2, it should be (height, width). + degrees (tuple, optional): Range of random sharpness adjustment degrees. It should be in (min, max) format. + If min=max, then it is a single fixed magnitude operation (default = (0.1, 1.9)). + + Raises: + TypeError : If degrees is not a list or tuple. + ValueError: If degrees is negative. + ValueError: If degrees is in (max, min) format instead of (min, max). Examples: - >>> # randomly resize image, keeping aspect ratio - >>> transforms_list1 = [c_vision.Decode(), c_vision.RandomResize(50)] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, + >>> transforms_list = [c_vision.Decode(), c_vision.RandomSharpness(degrees=(0.2, 1.9))] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) - >>> # randomly resize image to landscape style - >>> transforms_list2 = [c_vision.Decode(), c_vision.RandomResize((40, 60))] - >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, - ... input_columns=["image"]) """ - @check_resize - def __init__(self, size): - self.size = size + @check_positive_degrees + def __init__(self, degrees=(0.1, 1.9)): + self.degrees = degrees def parse(self): - size = self.size - if isinstance(size, int): - size = (size,) - return cde.RandomResizeOperation(size) + return cde.RandomSharpnessOperation(self.degrees) -class RandomResizeWithBBox(ImageTensorOperation): +class RandomSolarize(ImageTensorOperation): """ - Tensor operation to resize the input image using a randomly selected interpolation mode and adjust - bounding boxes accordingly. + Invert all pixel values above a threshold. Args: - size (Union[int, sequence]): The output size of the resized image. - If size is an integer, smaller edge of the image will be resized to this value with - the same image aspect ratio. - If size is a sequence of length 2, it should be (height, width). - - Examples: - >>> # randomly resize image with bounding boxes, keeping aspect ratio - >>> transforms_list1 = [c_vision.Decode(), c_vision.RandomResizeWithBBox(60)] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, - ... input_columns=["image"]) - >>> # randomly resize image with bounding boxes to portrait style - >>> transforms_list2 = [c_vision.Decode(), c_vision.RandomResizeWithBBox((80, 60))] - >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, - ... input_columns=["image"]) - """ - - @check_resize - def __init__(self, size): - self.size = size - - def parse(self): - size = self.size - if isinstance(size, int): - size = (size,) - return cde.RandomResizeWithBBoxOperation(size) - - -class HWC2CHW(ImageTensorOperation): - """ - Transpose the input image; shape (H, W, C) to shape (C, H, W). + threshold (tuple, optional): Range of random solarize threshold. Threshold values should always be + in the range (0, 255), include at least one integer value in the given range and + be in (min, max) format. If min=max, then it is a single fixed magnitude operation (default=(0, 255)). Examples: - >>> transforms_list = [c_vision.Decode(), - ... c_vision.RandomHorizontalFlip(0.75), - ... c_vision.RandomCrop(512), - ... c_vision.HWC2CHW()] + >>> transforms_list = [c_vision.Decode(), c_vision.RandomSolarize(threshold=(10,100))] >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) """ - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image array, of shape (H, W, C), to have channels swapped. - - Returns: - img (NumPy), Image array, of shape (C, H, W), with channels swapped. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - hwc2chw = cde.Execute(cde.HwcToChwOperation()) - img = hwc2chw(cde.Tensor(np.asarray(img))) - return img.as_array() + @check_random_solarize + def __init__(self, threshold=(0, 255)): + self.threshold = threshold def parse(self): - return cde.HwcToChwOperation() + return cde.RandomSolarizeOperation(self.threshold) -class RandomCropDecodeResize(ImageTensorOperation): +class RandomVerticalFlip(ImageTensorOperation): """ - Equivalent to RandomResizedCrop, but crops before decodes. + Flip the input image vertically, randomly with a given probability. Args: - size (Union[int, sequence]): The size of the output image. - If size is an integer, a square crop of size (size, size) is returned. - If size is a sequence of length 2, it should be (height, width). - scale (tuple, optional): Range [min, max) of respective size of the - original size to be cropped (default=(0.08, 1.0)). - ratio (tuple, optional): Range [min, max) of aspect ratio to be - cropped (default=(3. / 4., 4. / 3.)). - interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). - It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. - - - Inter.BILINEAR, means interpolation method is bilinear interpolation. - - - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. - - - Inter.BICUBIC, means interpolation method is bicubic interpolation. - - max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10). - If exceeded, fall back to use center_crop instead. + prob (float, optional): Probability of the image being flipped (default=0.5). Examples: - >>> from mindspore.dataset.vision import Inter - >>> resize_crop_decode_op = c_vision.RandomCropDecodeResize(size=(50, 75), - ... scale=(0.25, 0.5), - ... interpolation=Inter.NEAREST, - ... max_attempts=5) - >>> transforms_list = [resize_crop_decode_op] + >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlip(0.25)] >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) """ - @check_random_resize_crop - def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), - interpolation=Inter.BILINEAR, max_attempts=10): - if isinstance(size, int): - size = (size, size) - self.size = size - self.scale = scale - self.ratio = ratio - self.interpolation = interpolation - self.max_attempts = max_attempts - - def parse(self): - return cde.RandomCropDecodeResizeOperation(self.size, self.scale, self.ratio, - DE_C_INTER_MODE[self.interpolation], - self.max_attempts) - - -class Pad(ImageTensorOperation): - """ - Pads the image according to padding parameters. - - Args: - padding (Union[int, sequence]): The number of pixels to pad the image. - If a single number is provided, it pads all borders with this value. - If a tuple or list of 2 values are provided, it pads the (left and top) - with the first value and (right and bottom) with the second value. - If 4 values are provided as a list or tuple, - it pads the left, top, right and bottom respectively. - fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for - padding_mode Border.CONSTANT (default=0). - If it is an integer, it is used for all RGB channels. - If it is a 3-tuple, it is used to fill R, G, B channels respectively. - The fill_value values must be in range [0, 255]. - padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of - [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. + @check_prob + def __init__(self, prob=0.5): + self.prob = prob - - Border.CONSTANT, means it fills the border with constant values. + def parse(self): + return cde.RandomVerticalFlipOperation(self.prob) - - Border.EDGE, means it pads with the last value on the edge. - - Border.REFLECT, means it reflects the values on the edge omitting the last - value of edge. +class RandomVerticalFlipWithBBox(ImageTensorOperation): + """ + Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly. - - Border.SYMMETRIC, means it reflects the values on the edge repeating the last - value of edge. + Args: + prob (float, optional): Probability of the image being flipped (default=0.5). Examples: - >>> from mindspore.dataset.vision import Border - >>> transforms_list = [c_vision.Decode(), c_vision.Pad([100, 100, 100, 100])] + >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlipWithBBox(0.20)] >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) """ - @check_pad - def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT): - padding = parse_padding(padding) - if isinstance(fill_value, int): - fill_value = tuple([fill_value] * 3) - self.padding = padding - self.fill_value = fill_value - self.padding_mode = padding_mode + @check_prob + def __init__(self, prob=0.5): + self.prob = prob def parse(self): - return cde.PadOperation(self.padding, self.fill_value, DE_C_BORDER_TYPE[self.padding_mode]) - - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image to be padded. - - Returns: - img (NumPy), Padded image. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - pad = cde.Execute(cde.PadOperation(self.padding, self.fill_value, DE_C_BORDER_TYPE[self.padding_mode])) - img = pad(cde.Tensor(np.asarray(img))) - return img.as_array() + return cde.RandomVerticalFlipWithBBoxOperation(self.prob) -class UniformAugment(ImageTensorOperation): +class Rescale(ImageTensorOperation): """ - Tensor operation to perform randomly selected augmentation. + Tensor operation to rescale the input image. Args: - transforms: List of C++ operations (Python operations are not accepted). - num_ops (int, optional): Number of operations to be selected and applied (default=2). + rescale (float): Rescale factor. + shift (float): Shift factor. Examples: - >>> import mindspore.dataset.vision.py_transforms as py_vision - >>> transforms_list = [c_vision.RandomHorizontalFlip(), - ... c_vision.RandomVerticalFlip(), - ... c_vision.RandomColorAdjust(), - ... c_vision.RandomRotation(degrees=45)] - >>> uni_aug_op = c_vision.UniformAugment(transforms=transforms_list, num_ops=2) - >>> transforms_all = [c_vision.Decode(), c_vision.Resize(size=[224, 224]), - ... uni_aug_op, py_vision.ToTensor()] - >>> image_folder_dataset_1 = image_folder_dataset.map(operations=transforms_all, - ... input_columns="image", - ... num_parallel_workers=1) + >>> transforms_list = [c_vision.Decode(), c_vision.Rescale(1.0 / 255.0, -1.0)] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) """ - @check_uniform_augment_cpp - def __init__(self, transforms, num_ops=2): - self.transforms = transforms - self.num_ops = num_ops + @check_rescale + def __init__(self, rescale, shift): + self.rescale = rescale + self.shift = shift def parse(self): - transforms = [] - for op in self.transforms: - if op and getattr(op, 'parse', None): - transforms.append(op.parse()) - else: - transforms.append(op) - return cde.UniformAugOperation(transforms, self.num_ops) + return cde.RescaleOperation(self.rescale, self.shift) -class RandomSelectSubpolicy(ImageTensorOperation): +class Resize(ImageTensorOperation): """ - Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples - (op, prob), where op is a TensorOp operation and prob is the probability that this op will be applied. Once - a sub-policy is selected, each op within the subpolicy with be applied in sequence according to its probability. + Resize the input image to the given size. Args: - policy (list(list(tuple(TensorOp, float))): List of sub-policies to choose from. + size (Union[int, sequence]): The output size of the resized image. + If size is an integer, the smaller edge of the image will be resized to this value with + the same image aspect ratio. + If size is a sequence of length 2, it should be (height, width). + interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR). + It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC]. + + - Inter.LINEAR, means interpolation method is bilinear interpolation. + + - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. + + - Inter.BICUBIC, means interpolation method is bicubic interpolation. + + - Inter.AREA, means interpolation method is pixel area interpolation. Examples: - >>> policy = [[(c_vision.RandomRotation((45, 45)), 0.5), - ... (c_vision.RandomVerticalFlip(), 1), - ... (c_vision.RandomColorAdjust(), 0.8)], - ... [(c_vision.RandomRotation((90, 90)), 1), - ... (c_vision.RandomColorAdjust(), 0.2)]] - >>> image_folder_dataset_1 = image_folder_dataset.map(operations=c_vision.RandomSelectSubpolicy(policy), - ... input_columns=["image"]) + >>> from mindspore.dataset.vision import Inter + >>> decode_op = c_vision.Decode() + >>> resize_op = c_vision.Resize([100, 75], Inter.BICUBIC) + >>> transforms_list = [decode_op, resize_op] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) """ - @check_random_select_subpolicy_op - def __init__(self, policy): - self.policy = policy + @check_resize_interpolation + def __init__(self, size, interpolation=Inter.LINEAR): + if isinstance(size, int): + size = (size,) + self.size = size + self.interpolation = interpolation def parse(self): - """ - Return a C++ representation of the operator for execution - """ - policy = [] - for list_one in self.policy: - policy_one = [] - for list_two in list_one: - if list_two[0] and getattr(list_two[0], 'parse', None): - policy_one.append((list_two[0].parse(), list_two[1])) - else: - policy_one.append((list_two[0], list_two[1])) - policy.append(policy_one) - return cde.RandomSelectSubpolicyOperation(policy) + return cde.ResizeOperation(self.size, DE_C_INTER_MODE[self.interpolation]) -class SoftDvppDecodeResizeJpeg(ImageTensorOperation): +class ResizeWithBBox(ImageTensorOperation): """ - Tensor operation to decode and resize JPEG image using the simulation algorithm of - Ascend series chip DVPP module. - - It is recommended to use this algorithm in the following scenarios: - When training, the DVPP of the Ascend chip is not used, - and the DVPP of the Ascend chip is used during inference, - and the accuracy of inference is lower than the accuracy of training; - and the input image size should be in range [32*32, 8192*8192]. - The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16]. - Only images with an even resolution can be output. The output of odd resolution is not supported. + Resize the input image to the given size and adjust bounding boxes accordingly. Args: size (Union[int, sequence]): The output size of the resized image. If size is an integer, smaller edge of the image will be resized to this value with the same image aspect ratio. If size is a sequence of length 2, it should be (height, width). + interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR). + It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC]. + + - Inter.LINEAR, means interpolation method is bilinear interpolation. + + - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. + + - Inter.BICUBIC, means interpolation method is bicubic interpolation. Examples: - >>> # decode and resize image, keeping aspect ratio - >>> transforms_list1 = [c_vision.Decode(), c_vision.SoftDvppDecodeResizeJpeg(70)] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, + >>> from mindspore.dataset.vision import Inter + >>> decode_op = c_vision.Decode() + >>> bbox_op = c_vision.ResizeWithBBox(50, Inter.NEAREST) + >>> transforms_list = [decode_op, bbox_op] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) - >>> # decode and resize to portrait style - >>> transforms_list2 = [c_vision.Decode(), c_vision.SoftDvppDecodeResizeJpeg((80, 60))] - >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, - ... input_columns=["image"]) """ - @check_resize - def __init__(self, size): - if isinstance(size, int): - size = (size,) + @check_resize_interpolation + def __init__(self, size, interpolation=Inter.LINEAR): self.size = size + self.interpolation = interpolation def parse(self): - return cde.SoftDvppDecodeResizeJpegOperation(self.size) + size = self.size + if isinstance(size, int): + size = (size,) + return cde.ResizeWithBBoxOperation(size, DE_C_INTER_MODE[self.interpolation]) class SoftDvppDecodeRandomCropResizeJpeg(ImageTensorOperation): @@ -1531,24 +1412,78 @@ class SoftDvppDecodeRandomCropResizeJpeg(ImageTensorOperation): return cde.SoftDvppDecodeRandomCropResizeJpegOperation(self.size, self.scale, self.ratio, self.max_attempts) -class RandomSolarize(ImageTensorOperation): +class SoftDvppDecodeResizeJpeg(ImageTensorOperation): """ - Invert all pixel values above a threshold. + Tensor operation to decode and resize JPEG image using the simulation algorithm of + Ascend series chip DVPP module. + + It is recommended to use this algorithm in the following scenarios: + When training, the DVPP of the Ascend chip is not used, + and the DVPP of the Ascend chip is used during inference, + and the accuracy of inference is lower than the accuracy of training; + and the input image size should be in range [32*32, 8192*8192]. + The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16]. + Only images with an even resolution can be output. The output of odd resolution is not supported. Args: - threshold (tuple, optional): Range of random solarize threshold. Threshold values should always be - in the range (0, 255), include at least one integer value in the given range and - be in (min, max) format. If min=max, then it is a single fixed magnitude operation (default=(0, 255)). + size (Union[int, sequence]): The output size of the resized image. + If size is an integer, smaller edge of the image will be resized to this value with + the same image aspect ratio. + If size is a sequence of length 2, it should be (height, width). Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomSolarize(threshold=(10,100))] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + >>> # decode and resize image, keeping aspect ratio + >>> transforms_list1 = [c_vision.Decode(), c_vision.SoftDvppDecodeResizeJpeg(70)] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, ... input_columns=["image"]) + >>> # decode and resize to portrait style + >>> transforms_list2 = [c_vision.Decode(), c_vision.SoftDvppDecodeResizeJpeg((80, 60))] + >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, + ... input_columns=["image"]) """ - @check_random_solarize - def __init__(self, threshold=(0, 255)): - self.threshold = threshold + @check_resize + def __init__(self, size): + if isinstance(size, int): + size = (size,) + self.size = size def parse(self): - return cde.RandomSolarizeOperation(self.threshold) + return cde.SoftDvppDecodeResizeJpegOperation(self.size) + + +class UniformAugment(ImageTensorOperation): + """ + Tensor operation to perform randomly selected augmentation. + + Args: + transforms: List of C++ operations (Python operations are not accepted). + num_ops (int, optional): Number of operations to be selected and applied (default=2). + + Examples: + >>> import mindspore.dataset.vision.py_transforms as py_vision + >>> transforms_list = [c_vision.RandomHorizontalFlip(), + ... c_vision.RandomVerticalFlip(), + ... c_vision.RandomColorAdjust(), + ... c_vision.RandomRotation(degrees=45)] + >>> uni_aug_op = c_vision.UniformAugment(transforms=transforms_list, num_ops=2) + >>> transforms_all = [c_vision.Decode(), c_vision.Resize(size=[224, 224]), + ... uni_aug_op, py_vision.ToTensor()] + >>> image_folder_dataset_1 = image_folder_dataset.map(operations=transforms_all, + ... input_columns="image", + ... num_parallel_workers=1) + """ + + @check_uniform_augment_cpp + def __init__(self, transforms, num_ops=2): + self.transforms = transforms + self.num_ops = num_ops + + def parse(self): + transforms = [] + for op in self.transforms: + if op and getattr(op, 'parse', None): + transforms.append(op.parse()) + else: + transforms.append(op) + return cde.UniformAugOperation(transforms, self.num_ops) diff --git a/tests/ut/python/dataset/test_HWC2CHW.py b/tests/ut/python/dataset/test_HWC2CHW.py index ac5936ad0e..612dc34e27 100644 --- a/tests/ut/python/dataset/test_HWC2CHW.py +++ b/tests/ut/python/dataset/test_HWC2CHW.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,6 +29,20 @@ DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" +def test_HWC2CHW_callable(): + """ + Test HWC2CHW is callable + """ + logger.info("Test HWC2CHW callable") + img = np.fromfile("../data/dataset/apple.jpg", dtype=np.uint8) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + + img = c_vision.Decode()(img) + img = c_vision.HWC2CHW()(img) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + assert img.shape == (3, 2268, 4032) + + def test_HWC2CHW(plot=False): """ Test HWC2CHW @@ -122,6 +136,7 @@ def test_HWC2CHW_comp(plot=False): if __name__ == '__main__': + test_HWC2CHW_callable() test_HWC2CHW(True) test_HWC2CHW_md5() test_HWC2CHW_comp(True) diff --git a/tests/ut/python/dataset/test_compose.py b/tests/ut/python/dataset/test_compose.py index db86b1cbeb..10bcfe189d 100644 --- a/tests/ut/python/dataset/test_compose.py +++ b/tests/ut/python/dataset/test_compose.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -219,7 +219,7 @@ def test_c_py_compose_vision_module(plot=False, run_golden=True): def test_py_transforms_with_c_vision(): """ - These examples will fail, as py_transforms.Random(Apply/Choice/Order) expect callable functions + These examples will fail, as c_transform should not be used in py_transforms.Random(Apply/Choice/Order) """ ds.config.set_seed(0) @@ -236,15 +236,15 @@ def test_py_transforms_with_c_vision(): with pytest.raises(ValueError) as error_info: test_config(py_transforms.RandomApply([c_vision.RandomResizedCrop(200)])) - assert "transforms[0] is not callable." in str(error_info.value) + assert "transforms[0] is not a py transforms." in str(error_info.value) with pytest.raises(ValueError) as error_info: test_config(py_transforms.RandomChoice([c_vision.RandomResizedCrop(200)])) - assert "transforms[0] is not callable." in str(error_info.value) + assert "transforms[0] is not a py transforms." in str(error_info.value) with pytest.raises(ValueError) as error_info: test_config(py_transforms.RandomOrder([np.array, c_vision.RandomResizedCrop(200)])) - assert "transforms[1] is not callable." in str(error_info.value) + assert "transforms[1] is not a py transforms." in str(error_info.value) with pytest.raises(RuntimeError) as error_info: test_config([py_transforms.OneHotOp(20, 0.1)]) diff --git a/tests/ut/python/dataset/test_invert.py b/tests/ut/python/dataset/test_invert.py index 07a4d5bc6f..3387427bc7 100644 --- a/tests/ut/python/dataset/test_invert.py +++ b/tests/ut/python/dataset/test_invert.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,6 +29,21 @@ DATA_DIR = "../data/dataset/testImageNetData/train/" GENERATE_GOLDEN = False +def test_invert_callable(): + """ + Test Invert is callable + """ + logger.info("Test Invert callable") + img = np.fromfile("../data/dataset/apple.jpg", dtype=np.uint8) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + + img = C.Decode()(img) + img = C.Invert()(img) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + + assert img.shape == (2268, 4032, 3) + + def test_invert_py(plot=False): """ Test Invert python op @@ -247,6 +262,7 @@ def test_invert_md5_c(): if __name__ == "__main__": + test_invert_callable() test_invert_py(plot=False) test_invert_c(plot=False) test_invert_py_c(plot=False) diff --git a/tests/ut/python/dataset/test_random_crop_and_resize.py b/tests/ut/python/dataset/test_random_crop_and_resize.py index b4799a71c8..1fde552361 100644 --- a/tests/ut/python/dataset/test_random_crop_and_resize.py +++ b/tests/ut/python/dataset/test_random_crop_and_resize.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,6 +34,22 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" GENERATE_GOLDEN = False +def test_random_crop_and_resize_callable(): + """ + Test RandomCropAndResize op is callable + """ + logger.info("test_random_crop_and_resize_callable") + img = np.fromfile("../data/dataset/apple.jpg", dtype=np.uint8) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + + decode_op = c_vision.Decode() + img = decode_op(img) + + random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (2, 2), (1, 3)) + img = random_crop_and_resize_op(img) + assert np.shape(img) == (256, 512, 3) + + def test_random_crop_and_resize_op_c(plot=False): """ Test RandomCropAndResize op in c transforms @@ -389,6 +405,7 @@ def test_random_crop_and_resize_06(): if __name__ == "__main__": + test_random_crop_and_resize_callable() test_random_crop_and_resize_op_c(True) test_random_crop_and_resize_op_py(True) test_random_crop_and_resize_op_py_ANTIALIAS() diff --git a/tests/ut/python/dataset/test_text_jieba_tokenizer.py b/tests/ut/python/dataset/test_text_jieba_tokenizer.py index 21a9c611be..f13128316e 100644 --- a/tests/ut/python/dataset/test_text_jieba_tokenizer.py +++ b/tests/ut/python/dataset/test_text_jieba_tokenizer.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ import numpy as np import mindspore.dataset as ds from mindspore.dataset.text import JiebaTokenizer from mindspore.dataset.text import JiebaMode, to_str +from mindspore import log as logger DATA_FILE = "../data/dataset/testJiebaDataset/3.txt" DATA_ALL_FILE = "../data/dataset/testJiebaDataset/*" @@ -24,6 +25,23 @@ HMM_FILE = "../data/dataset/jiebadict/hmm_model.utf8" MP_FILE = "../data/dataset/jiebadict/jieba.dict.utf8" +def test_jieba_callable(): + """ + Test jieba tokenizer op is callable + """ + logger.info("test_jieba_callable") + jieba_op1 = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP) + jieba_op2 = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM) + + text1 = "今天天气太好了我们一起去外面玩吧" + text2 = "男默女泪市长江大桥" + assert np.array_equal(jieba_op1(text1), ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']) + assert np.array_equal(jieba_op2(text1), ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧']) + + jieba_op1.add_word("男默女泪") + assert np.array_equal(jieba_op1(text2), ['男默女泪', '市', '长江大桥']) + + def test_jieba_1(): """Test jieba tokenizer with MP mode""" data = ds.TextFileDataset(DATA_FILE) @@ -457,6 +475,7 @@ def test_jieba_6(): if __name__ == "__main__": + test_jieba_callable() test_jieba_1() test_jieba_1_1() test_jieba_1_2() diff --git a/tests/ut/python/dataset/test_uniform_augment.py b/tests/ut/python/dataset/test_uniform_augment.py index c8548cf82c..1adcc4a991 100644 --- a/tests/ut/python/dataset/test_uniform_augment.py +++ b/tests/ut/python/dataset/test_uniform_augment.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,6 +28,24 @@ from util import visualize_list, diff_mse DATA_DIR = "../data/dataset/testImageNetData/train/" +def test_uniform_augment_callable(num_ops=2): + """ + Test UniformAugment is callable + """ + logger.info("test_uniform_augment_callable") + img = np.fromfile("../data/dataset/apple.jpg", dtype=np.uint8) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + + decode_op = C.Decode() + img = decode_op(img) + + transforms_ua = [C.RandomCrop(size=[400, 400], padding=[32, 32, 32, 32]), + C.RandomCrop(size=[400, 400], padding=[32, 32, 32, 32])] + uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops) + img = uni_aug([img, img]) + assert ((np.shape(img) == (2, 2268, 4032, 3)) or (np.shape(img) == (1, 400, 400, 3))) + + def test_uniform_augment(plot=False, num_ops=2): """ Test UniformAugment @@ -262,6 +280,7 @@ def test_cpp_uniform_augment_random_crop_badinput(num_ops=1): if __name__ == "__main__": + test_uniform_augment_callable(num_ops=2) test_uniform_augment(num_ops=1, plot=True) test_cpp_uniform_augment(num_ops=1, plot=True) test_cpp_uniform_augment_exception_pyops(num_ops=1) diff --git a/tests/ut/python/dataset/test_vocab.py b/tests/ut/python/dataset/test_vocab.py index f9032b3e54..a6818ac2e7 100644 --- a/tests/ut/python/dataset/test_vocab.py +++ b/tests/ut/python/dataset/test_vocab.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ import numpy as np import mindspore.dataset as ds import mindspore.dataset.text as text import mindspore.common.dtype as mstype +from mindspore import log as logger # this file contains "home is behind the world head" each word is 1 line DATA_FILE = "../data/dataset/testVocab/words.txt" @@ -25,6 +26,16 @@ VOCAB_FILE = "../data/dataset/testVocab/vocab_list.txt" SIMPLE_VOCAB_FILE = "../data/dataset/testVocab/simple_vocab_list.txt" +def test_lookup_callable(): + """ + Test lookup is callable + """ + logger.info("test_lookup_callable") + vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您']) + lookup = text.Lookup(vocab) + word = "迎" + assert lookup(word) == 3 + def test_from_list_tutorial(): vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["", ""], True) lookup = text.Lookup(vocab, "") @@ -171,6 +182,7 @@ def test_lookup_cast_type(): if __name__ == '__main__': + test_lookup_callable() test_from_dict_exception() test_from_list_tutorial() test_from_file_tutorial()