You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

322 lines
8.7 KiB

// Copyright 2005-2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the 'License');
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an 'AS IS' BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// See www.openfst.org for extensive documentation on this weighted
// finite-state transducer library.
#ifndef FST_COMPAT_H_
#define FST_COMPAT_H_
#include <algorithm>
#include <climits>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <iterator>
#include <memory>
#include <numeric>
#include <string>
#include <string_view>
#include <type_traits>
#include <utility>
#include <vector>
#if defined(__GNUC__) || defined(__clang__)
#define OPENFST_DEPRECATED(message) __attribute__((deprecated(message)))
#elif defined(_MSC_VER)
#define OPENFST_DEPRECATED(message) [[deprecated(message)]]
#else
#define OPENFST_DEPRECATED(message)
#endif
namespace fst {
// Downcasting.
template <typename To, typename From>
inline To down_cast(From *f) {
return static_cast<To>(f);
}
template <typename To, typename From>
inline To down_cast(From &f) {
return static_cast<To>(f);
}
// Bitcasting.
template <class Dest, class Source>
inline Dest bit_cast(const Source &source) {
static_assert(sizeof(Dest) == sizeof(Source),
"Bitcasting unsafe for specified types");
Dest dest;
memcpy(&dest, &source, sizeof(dest));
return dest;
}
template <typename T>
T UnalignedLoad(const void *p) {
T t;
memcpy(&t, p, sizeof t);
return t;
}
namespace internal {
// TODO(kbg): Remove this once we migrate to C++20.
template <typename T>
struct type_identity {
using type = T;
};
template <typename T>
using type_identity_t = typename type_identity<T>::type;
} // namespace internal
template <typename To>
constexpr To implicit_cast(typename internal::type_identity_t<To> to) {
return to;
}
// Checksums.
class CheckSummer {
public:
CheckSummer();
void Reset();
void Update(std::string_view data);
std::string Digest() { return check_sum_; }
private:
static constexpr int kCheckSumLength = 32;
int count_;
std::string check_sum_;
CheckSummer(const CheckSummer &) = delete;
CheckSummer &operator=(const CheckSummer &) = delete;
};
// Defines make_unique_for_overwrite using a standard definition that should be
// compatible with the C++20 definition. That is, all compiling uses of
// `std::make_unique_for_overwrite` should have the same result with
// `fst::make_unique_for_overwrite`. Note that the reverse doesn't
// necessarily hold.
// TODO(kbg): Remove these once we migrate to C++20.
template <typename T>
std::unique_ptr<T> make_unique_for_overwrite() {
return std::unique_ptr<T>(new T);
}
template <typename T>
std::unique_ptr<T> make_unique_for_overwrite(size_t n) {
return std::unique_ptr<T>(new std::remove_extent_t<T>[n]);
}
template <typename T>
std::unique_ptr<T> WrapUnique(T *ptr) {
return std::unique_ptr<T>(ptr);
}
// Range utilities
// A range adaptor for a pair of iterators.
//
// This just wraps two iterators into a range-compatible interface. Nothing
// fancy at all.
template <typename IteratorT>
class iterator_range {
public:
using iterator = IteratorT;
using const_iterator = IteratorT;
using value_type = typename std::iterator_traits<IteratorT>::value_type;
iterator_range() : begin_iterator_(), end_iterator_() {}
iterator_range(IteratorT begin_iterator, IteratorT end_iterator)
: begin_iterator_(std::move(begin_iterator)),
end_iterator_(std::move(end_iterator)) {}
IteratorT begin() const { return begin_iterator_; }
IteratorT end() const { return end_iterator_; }
private:
IteratorT begin_iterator_, end_iterator_;
};
// Convenience function for iterating over sub-ranges.
//
// This provides a bit of syntactic sugar to make using sub-ranges
// in for loops a bit easier. Analogous to std::make_pair().
template <typename T>
iterator_range<T> make_range(T x, T y) {
return iterator_range<T>(std::move(x), std::move(y));
}
// String munging.
namespace internal {
// Computes size of joined string.
template <class S>
size_t GetResultSize(const std::vector<S> &elements, size_t s_size) {
const auto lambda = [](size_t partial, const S &right) {
return partial + right.size();
};
return std::accumulate(elements.begin(), elements.end(), 0, lambda) +
elements.size() * s_size - s_size;
}
} // namespace internal
template <class S>
std::string StringJoin(const std::vector<S> &elements, std::string_view delim) {
std::string result;
if (elements.empty()) return result;
const size_t s_size = delim.size();
result.reserve(internal::GetResultSize(elements, s_size));
auto it = elements.begin();
result.append(it->data(), it->size());
for (++it; it != elements.end(); ++it) {
result.append(delim.data(), s_size);
result.append(it->data(), it->size());
}
return result;
}
template <class S>
std::string StringJoin(const std::vector<S> &elements, char delim) {
const std::string_view view_delim(&delim, 1);
return StringJoin(elements, view_delim);
}
struct SkipEmpty {};
struct ByAnyChar {
public:
explicit ByAnyChar(std::string_view sp) : delimiters(sp) {}
std::string delimiters;
};
namespace internal {
class StringSplitter {
public:
using const_iterator = std::vector<std::string_view>::const_iterator;
using value_type = std::string_view;
StringSplitter(std::string_view string, std::string delim,
bool skip_empty = false)
: string_(std::move(string)),
delim_(std::move(delim)),
skip_empty_(skip_empty),
vec_(SplitToSv()) {}
inline operator // NOLINT(google-explicit-constructor)
std::vector<std::string_view>() && {
return std::move(vec_);
}
inline operator // NOLINT(google-explicit-constructor)
std::vector<std::string>() {
std::vector<std::string> str_vec(vec_.begin(), vec_.end());
return str_vec;
}
const_iterator begin() const { return vec_.begin(); }
const_iterator end() const { return vec_.end(); }
private:
std::vector<std::string_view> SplitToSv();
std::string_view string_;
std::string delim_;
bool skip_empty_;
std::vector<std::string_view> vec_;
};
} // namespace internal
// `StrSplit` replacements. Only support splitting on `char` or
// `ByAnyChar` (notable not on a multi-char string delimiter), and with or
// without `SkipEmpty`.
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim);
internal::StringSplitter StrSplit(std::string_view full, char delim);
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim,
SkipEmpty);
internal::StringSplitter StrSplit(std::string_view full, char delim, SkipEmpty);
void StripTrailingAsciiWhitespace(std::string *full);
std::string_view StripTrailingAsciiWhitespace(std::string_view full);
class StringOrInt {
public:
template <typename T, typename = std::enable_if_t<
std::is_convertible_v<T, std::string_view>>>
StringOrInt(T s) : str_(std::string(s)) {} // NOLINT
StringOrInt(int i) { // NOLINT
str_ = std::to_string(i);
}
const std::string &Get() const { return str_; }
private:
std::string str_;
};
// TODO(kbg): Make this work with variadic template, maybe.
inline std::string StrCat(const StringOrInt &s1, const StringOrInt &s2) {
return s1.Get() + s2.Get();
}
inline std::string StrCat(const StringOrInt &s1, const StringOrInt &s2,
const StringOrInt &s3) {
return s1.Get() + StrCat(s2, s3);
}
inline std::string StrCat(const StringOrInt &s1, const StringOrInt &s2,
const StringOrInt &s3, const StringOrInt &s4) {
return s1.Get() + StrCat(s2, s3, s4);
}
inline std::string StrCat(const StringOrInt &s1, const StringOrInt &s2,
const StringOrInt &s3, const StringOrInt &s4,
const StringOrInt &s5) {
return s1.Get() + StrCat(s2, s3, s4, s5);
}
// TODO(agutkin): Remove this once we migrate to C++20, where `starts_with`
// is available.
inline bool StartsWith(std::string_view text, std::string_view prefix) {
return prefix.empty() ||
(text.size() >= prefix.size() &&
memcmp(text.data(), prefix.data(), prefix.size()) == 0);
}
inline bool ConsumePrefix(std::string_view *s, std::string_view expected) {
if (!StartsWith(*s, expected)) return false;
s->remove_prefix(expected.size());
return true;
}
} // namespace fst
#endif // FST_COMPAT_H_