|
|
|
@ -87,8 +87,8 @@ std::shared_ptr<JiebaTokenizerOperation> JiebaTokenizer(const std::string &hmm_p
|
|
|
|
|
return op->ValidateParams() ? op : nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<LookupOperation> Lookup(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token,
|
|
|
|
|
const std::string &data_type) {
|
|
|
|
|
std::shared_ptr<LookupOperation> Lookup(const std::shared_ptr<Vocab> &vocab,
|
|
|
|
|
const std::optional<std::string> &unknown_token, const std::string &data_type) {
|
|
|
|
|
auto op = std::make_shared<LookupOperation>(vocab, unknown_token, data_type);
|
|
|
|
|
|
|
|
|
|
return op->ValidateParams() ? op : nullptr;
|
|
|
|
@ -340,7 +340,7 @@ Status JiebaTokenizerOperation::AddWord(const std::string &word, int64_t freq) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// LookupOperation
|
|
|
|
|
LookupOperation::LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token,
|
|
|
|
|
LookupOperation::LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::optional<std::string> &unknown_token,
|
|
|
|
|
const std::string &data_type)
|
|
|
|
|
: vocab_(vocab), unknown_token_(unknown_token), default_id_(Vocab::kNoTokenExists), data_type_(data_type) {}
|
|
|
|
|
|
|
|
|
@ -352,10 +352,10 @@ Status LookupOperation::ValidateParams() {
|
|
|
|
|
MS_LOG(ERROR) << err_msg;
|
|
|
|
|
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
|
|
|
|
}
|
|
|
|
|
if (!unknown_token_.empty()) {
|
|
|
|
|
default_id_ = vocab_->Lookup(unknown_token_);
|
|
|
|
|
if (unknown_token_ != std::nullopt) {
|
|
|
|
|
default_id_ = vocab_->Lookup(*unknown_token_);
|
|
|
|
|
if (default_id_ == Vocab::kNoTokenExists) {
|
|
|
|
|
std::string err_msg = "Lookup: \"" + unknown_token_ + "\" doesn't exist in vocab.";
|
|
|
|
|
std::string err_msg = "Lookup: \"" + *unknown_token_ + "\" doesn't exist in vocab.";
|
|
|
|
|
MS_LOG(ERROR) << err_msg;
|
|
|
|
|
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
|
|
|
|
}
|
|
|
|
|