|
|
|
@ -19,7 +19,7 @@ utils provides some general methods for NLP text processing.
|
|
|
|
|
"""
|
|
|
|
|
import platform
|
|
|
|
|
from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, TruncateSequencePair, \
|
|
|
|
|
ToNumber, SlidingWindow, SentencePieceTokenizer
|
|
|
|
|
ToNumber, SlidingWindow, SentencePieceTokenizer, PythonTokenizer
|
|
|
|
|
from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \
|
|
|
|
|
SPieceTokenizerOutType, SPieceTokenizerLoadType
|
|
|
|
|
|
|
|
|
@ -33,7 +33,7 @@ __all__ = [
|
|
|
|
|
|
|
|
|
|
if platform.system().lower() != 'windows':
|
|
|
|
|
from .transforms import UnicodeScriptTokenizer, WhitespaceTokenizer, CaseFold, NormalizeUTF8, \
|
|
|
|
|
RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer, PythonTokenizer
|
|
|
|
|
RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer
|
|
|
|
|
|
|
|
|
|
__all__.append(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8",
|
|
|
|
|
"RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer"])
|
|
|
|
|