You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
152 lines
4.4 KiB
152 lines
4.4 KiB
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
/*
|
|
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
#include "paddle/string/piece.h"
|
|
|
|
#include <string.h>
|
|
|
|
#include <algorithm>
|
|
#include <iosfwd>
|
|
#include <stdexcept>
|
|
|
|
namespace paddle {
|
|
namespace string {
|
|
|
|
Piece::Piece() : data_(NULL), size_(0) {}
|
|
|
|
Piece::Piece(const char* d, size_t n) : data_(d), size_(n) {
|
|
if (d == NULL && n != 0)
|
|
throw std::invalid_argument("Piece requires len to be 0 for NULL data");
|
|
}
|
|
|
|
Piece::Piece(const char* s) : data_(s) { size_ = (s == NULL) ? 0 : strlen(s); }
|
|
|
|
Piece::Piece(const std::string& s) : data_(s.data()), size_(s.size()) {}
|
|
|
|
char Piece::operator[](size_t n) const {
|
|
if (n >= len()) throw std::invalid_argument("index out of Piece length");
|
|
return data_[n];
|
|
}
|
|
|
|
int Compare(Piece a, Piece b) {
|
|
const size_t min_len = (a.len() < b.len()) ? a.len() : b.len();
|
|
int r = memcmp(a.data(), b.data(), min_len);
|
|
if (r == 0) {
|
|
if (a.len() < b.len())
|
|
return -1;
|
|
else if (a.len() > b.len())
|
|
return 1;
|
|
}
|
|
return r;
|
|
}
|
|
|
|
bool operator==(Piece x, Piece y) {
|
|
return ((x.len() == y.len()) &&
|
|
(x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0));
|
|
}
|
|
|
|
bool operator!=(Piece x, Piece y) { return !(x == y); }
|
|
|
|
bool operator<(Piece x, Piece y) { return Compare(x, y) < 0; }
|
|
bool operator>(Piece x, Piece y) { return Compare(x, y) > 0; }
|
|
|
|
bool operator<=(Piece x, Piece y) { return Compare(x, y) <= 0; }
|
|
bool operator>=(Piece x, Piece y) { return Compare(x, y) >= 0; }
|
|
|
|
bool HasPrefix(Piece s, Piece x) {
|
|
return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0));
|
|
}
|
|
|
|
bool HasSuffix(Piece s, Piece x) {
|
|
return ((s.len() >= x.len()) &&
|
|
(memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0));
|
|
}
|
|
|
|
Piece SkipPrefix(Piece s, size_t n) {
|
|
if (n > s.len())
|
|
throw std::invalid_argument("Skip distance larger than Piece length");
|
|
return Piece(s.data() + n, s.len() - n);
|
|
}
|
|
|
|
Piece SkipSuffix(Piece s, size_t n) {
|
|
if (n > s.len())
|
|
throw std::invalid_argument("Skip distance larger than Piece length");
|
|
return Piece(s.data(), s.len() - n);
|
|
}
|
|
|
|
Piece TrimPrefix(Piece s, Piece x) {
|
|
return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s;
|
|
}
|
|
|
|
Piece TrimSuffix(Piece s, Piece x) {
|
|
return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s;
|
|
}
|
|
|
|
bool Contains(Piece s, Piece sub) {
|
|
return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end();
|
|
}
|
|
|
|
size_t Index(Piece s, Piece sub) {
|
|
auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end());
|
|
return e != s.end() ? e - s.data() : Piece::npos;
|
|
}
|
|
|
|
size_t Find(Piece s, char c, size_t pos) {
|
|
if (pos >= s.len()) {
|
|
return Piece::npos;
|
|
}
|
|
const char* result =
|
|
reinterpret_cast<const char*>(memchr(s.data() + pos, c, s.len() - pos));
|
|
return result != nullptr ? result - s.data() : Piece::npos;
|
|
}
|
|
|
|
size_t RFind(Piece s, char c, size_t pos) {
|
|
if (s.len() == 0) return Piece::npos;
|
|
for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data();
|
|
p--) {
|
|
if (*p == c) {
|
|
return p - s.data();
|
|
}
|
|
}
|
|
return Piece::npos;
|
|
}
|
|
|
|
Piece SubStr(Piece s, size_t pos, size_t n) {
|
|
if (pos > s.len()) pos = s.len();
|
|
if (n > s.len() - pos) n = s.len() - pos;
|
|
return Piece(s.data() + pos, n);
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& o, Piece piece) {
|
|
return o << piece.ToString();
|
|
}
|
|
|
|
} // namespace string
|
|
} // namespace paddle
|