|
|
|
@ -119,10 +119,12 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
|
|
|
|
|
the dictionary can be obtained on the official website of cppjieba.
|
|
|
|
|
mp_path (str): the dictionary file is used by MPSegment algorithm,
|
|
|
|
|
the dictionary can be obtained on the official website of cppjieba.
|
|
|
|
|
mode (JiebaMode, optional): "MP" model will tokenize with MPSegment algorithm,
|
|
|
|
|
"HMM" mode will tokenize with Hiddel Markov Model Segment algorithm,
|
|
|
|
|
"MIX" model will tokenize with a mix of MPSegment and HMMSegment algorithm
|
|
|
|
|
(default="MIX").
|
|
|
|
|
mode (JiebaMode, optional): Valid values can be any of [JiebaMode.MP, JiebaMode.HMM,
|
|
|
|
|
JiebaMode.MIX](default=JiebaMode.MIX).
|
|
|
|
|
|
|
|
|
|
- JiebaMode.MP, tokenize with MPSegment algorithm.
|
|
|
|
|
- JiebaMode.HMM, tokenize with Hiddel Markov Model Segment algorithm.
|
|
|
|
|
- JiebaMode.MIX, tokenize with a mix of MPSegment and HMMSegment algorithm.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
@check_jieba_init
|
|
|
|
@ -287,10 +289,16 @@ if platform.system().lower() != 'windows':
|
|
|
|
|
Apply normalize operation on utf-8 string tensor.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
normalize_form (NormalizeForm, optional): Valid values are "NONE", "NFC", "NFKC", "NFD", "NFKD".
|
|
|
|
|
If set "NONE", will do nothing for input string tensor.
|
|
|
|
|
If set to any of "NFC", "NFKC", "NFD", "NFKD", will apply normalize operation(default="NFKC").
|
|
|
|
|
See http://unicode.org/reports/tr15/ for details.
|
|
|
|
|
normalize_form (NormalizeForm, optional): Valid values can be any of [NormalizeForm.NONE,
|
|
|
|
|
NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD,
|
|
|
|
|
NormalizeForm.NFKD](default=NormalizeForm.NFKC).
|
|
|
|
|
And you can see http://unicode.org/reports/tr15/ for details.
|
|
|
|
|
|
|
|
|
|
- NormalizeForm.NONE, do nothing for input string tensor.
|
|
|
|
|
- NormalizeForm.NFC, normalize with Normalization Form C.
|
|
|
|
|
- NormalizeForm.NFKC, normalize with Normalization Form KC.
|
|
|
|
|
- NormalizeForm.NFD, normalize with Normalization Form D.
|
|
|
|
|
- NormalizeForm.NFKD, normalize with Normalization Form KD.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, normalize_form=NormalizeForm.NFKC):
|
|
|
|
|