add note for ops not supported on windows

5 years ago · ca1b692640
parent a95cbdb121
commit ca1b692640
2 changed files with 17 additions and 12 deletions
--- a/mindspore/dataset/init.py
+++ b/mindspore/dataset/init.py
@ -12,10 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-This module provides APIs to load and process various datasets: MNIST,
-CIFAR-10, CIFAR-100, VOC, ImageNet, CelebA dataset, etc. It also supports
-datasets in special format, including mindrecord, tfrecord, manifest. Users
-can also create samplers with this module to sample data.
+This module provides APIs to load and process various common datasets such as MNIST,
+CIFAR-10, CIFAR-100, VOC, ImageNet, CelebA, etc. It also supports datasets in standard
+format, including MindRecord, TFRecord, Manifest, etc. Users can also define their own
+datasets with this module.
+
+Besides, this module provides APIs to sample data while loading.
+
+Please notice that cache is not supported on Windows platform yet. Please do not use it
+while loading and processing data on Windows.
 """

 from .core import config
--- a/mindspore/dataset/text/transforms.py
+++ b/mindspore/dataset/text/transforms.py
@ -418,7 +418,7 @@ if platform.system().lower() != 'windows':
        Tokenize a scalar tensor of UTF-8 string on ICU4C defined whitespaces, such as: ' ', '\\\\t', '\\\\r', '\\\\n'.

        Note:
-            The WhitespaceTokenizer is not supported on windows platform yet.
+            WhitespaceTokenizer is not supported on Windows platform yet.

        Args:
            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
@ -449,7 +449,7 @@ if platform.system().lower() != 'windows':
        Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries.

        Note:
-            The UnicodeScriptTokenizer is not supported on windows platform yet.
+            UnicodeScriptTokenizer is not supported on Windows platform yet.

        Args:
            keep_whitespace (bool, optional): If or not emit whitespace tokens (default=False).
@ -482,7 +482,7 @@ if platform.system().lower() != 'windows':
        Apply case fold operation on utf-8 string tensor.

        Note:
-            The CaseFold is not supported on windows platform yet.
+            CaseFold is not supported on Windows platform yet.

        Examples:
            >>> import mindspore.dataset.text as text
@ -505,7 +505,7 @@ if platform.system().lower() != 'windows':
        Apply normalize operation on utf-8 string tensor.

        Note:
-            The NormalizeUTF8 is not supported on windows platform yet.
+            NormalizeUTF8 is not supported on Windows platform yet.

        Args:
            normalize_form (NormalizeForm, optional): Valid values can be any of [NormalizeForm.NONE,
@ -541,7 +541,7 @@ if platform.system().lower() != 'windows':
        See http://userguide.icu-project.org/strings/regexp for support regex pattern.

        Note:
-            The RegexReplace is not supported on windows platform yet.
+            RegexReplace is not supported on Windows platform yet.

        Args:
            pattern (str): the regex expression patterns.
@ -572,7 +572,7 @@ if platform.system().lower() != 'windows':
        See http://userguide.icu-project.org/strings/regexp for support regex pattern.

        Note:
-            The RegexTokenizer is not supported on windows platform yet.
+            RegexTokenizer is not supported on Windows platform yet.

        Args:
            delim_pattern (str): The pattern of regex delimiters.
@ -610,7 +610,7 @@ if platform.system().lower() != 'windows':
        Tokenize a scalar tensor of UTF-8 string by specific rules.

        Note:
-            The BasicTokenizer is not supported on windows platform yet.
+            BasicTokenizer is not supported on Windows platform yet.

        Args:
            lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation
@ -666,7 +666,7 @@ if platform.system().lower() != 'windows':
        Tokenizer used for Bert text process.

        Note:
-            The BertTokenizer is not supported on windows platform yet.
+            BertTokenizer is not supported on Windows platform yet.

        Args:
            vocab (Vocab): A vocabulary object.