|
|
|
@ -60,6 +60,7 @@ class BasicTokenizer : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~BasicTokenizer() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -104,6 +105,7 @@ class BertTokenizer : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~BertTokenizer() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -123,6 +125,7 @@ class CaseFold : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~CaseFold() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
//// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -153,12 +156,13 @@ class JiebaTokenizer : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~JiebaTokenizer() = default;
|
|
|
|
|
|
|
|
|
|
Status AddWord(const std::string &word, int64_t freq = 0);
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
|
|
|
|
|
|
Status AddWord(const std::string &word, int64_t freq = 0);
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
struct Data;
|
|
|
|
|
std::shared_ptr<Data> data_;
|
|
|
|
@ -183,6 +187,7 @@ class Lookup : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~Lookup() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -214,6 +219,7 @@ class Ngram : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~Ngram() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -242,6 +248,7 @@ class NormalizeUTF8 : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~NormalizeUTF8() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -267,6 +274,7 @@ class RegexReplace : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~RegexReplace() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -294,6 +302,7 @@ class RegexTokenizer : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~RegexTokenizer() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -324,6 +333,7 @@ class SentencePieceTokenizer : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~SentencePieceTokenizer() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -346,6 +356,7 @@ class SlidingWindow : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~SlidingWindow() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -371,6 +382,7 @@ class ToNumber : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~ToNumber() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -390,6 +402,7 @@ class TruncateSequencePair : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~TruncateSequencePair() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -409,6 +422,7 @@ class UnicodeCharTokenizer : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~UnicodeCharTokenizer() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -430,6 +444,7 @@ class UnicodeScriptTokenizer : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~UnicodeScriptTokenizer() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
@ -449,6 +464,7 @@ class WhitespaceTokenizer : public TensorTransform {
|
|
|
|
|
/// \brief Destructor
|
|
|
|
|
~WhitespaceTokenizer() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
|
|
|
|
/// \return Shared pointer to TensorOperation object.
|
|
|
|
|
std::shared_ptr<TensorOperation> Parse() override;
|
|
|
|
|