// Copyright 2005-2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the 'License'); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an 'AS IS' BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // FST utility inline definitions. #ifndef FST_UTIL_H_ #define FST_UTIL_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // Utility for error handling. DECLARE_bool(fst_error_fatal); #define FSTERROR() \ (FST_FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) namespace fst { // Utility for type I/O. For portability of serialized objects across // architectures, care must be taken so that only fixed-size types (like // `int32_t`) are used with `WriteType`/`ReadType`, not types that may differ in // size depending on the architecture, such as `int`. For `enum` types, a // fixed-size base (like `enum E : int32_t`) should be used. Objects are // written and read in the host byte order, so will not be portable across // different endiannesses. namespace internal { // Whether the scalar type is supported by `ReadType`/`WriteType`. template inline constexpr bool IsScalarIOTypeV = std::is_arithmetic_v || std::is_enum_v; } // namespace internal // Reads types from an input stream. // Generic case. template , T> * = nullptr> inline std::istream &ReadType(std::istream &strm, T *t) { return t->Read(strm); } // Numeric (boolean, integral, floating-point) or enum case. template , T> * = nullptr> inline std::istream &ReadType(std::istream &strm, T *t) { return strm.read(reinterpret_cast(t), sizeof(T)); } // Numeric (boolean, integral, floating-point) or enum case only. template inline std::istream &ReadType(std::istream &strm, size_t n, T *t) { static_assert(internal::IsScalarIOTypeV, "Type not supported for batch read."); return strm.read(reinterpret_cast(t), sizeof(T) * n); } // String case. inline std::istream &ReadType(std::istream &strm, std::string *s) { s->clear(); int32_t ns = 0; ReadType(strm, &ns); if (ns <= 0) return strm; s->resize(ns); ReadType(strm, ns, s->data()); return strm; } // Declares types that can be read from an input stream. template std::istream &ReadType(std::istream &strm, std::vector *c); template std::istream &ReadType(std::istream &strm, std::list *c); template std::istream &ReadType(std::istream &strm, std::set *c); template std::istream &ReadType(std::istream &strm, std::map *c); template std::istream &ReadType(std::istream &strm, std::unordered_map *c); template std::istream &ReadType(std::istream &strm, std::unordered_set *c); // Pair case. template inline std::istream &ReadType(std::istream &strm, std::pair *p) { ReadType(strm, &p->first); ReadType(strm, &p->second); return strm; } template inline std::istream &ReadType(std::istream &strm, std::pair *p) { ReadType(strm, const_cast(&p->first)); ReadType(strm, &p->second); return strm; } namespace internal { template std::istream &ReadContainerType(std::istream &strm, C *c, ReserveFn reserve) { c->clear(); int64_t n = 0; ReadType(strm, &n); reserve(c, n); auto insert = std::inserter(*c, c->begin()); for (int64_t i = 0; i < n; ++i) { typename C::value_type value; ReadType(strm, &value); *insert = value; } return strm; } // Generic vector case. template , T> * = nullptr> inline std::istream &ReadVectorType(std::istream &strm, std::vector *c) { return internal::ReadContainerType( strm, c, [](decltype(c) v, int n) { v->reserve(n); }); } // Vector of numerics (boolean, integral, floating-point, char) or enum case. template < typename T, class A, typename std::enable_if_t, T> * = nullptr> inline std::istream &ReadVectorType(std::istream &strm, std::vector *c) { c->clear(); int64_t n = 0; ReadType(strm, &n); if (n == 0) return strm; c->resize(n); ReadType(strm, n, c->data()); return strm; } } // namespace internal template std::istream &ReadType(std::istream &strm, std::array *c) { if constexpr (internal::IsScalarIOTypeV) { ReadType(strm, c->size(), c->data()); } else { for (auto &v : *c) ReadType(strm, &v); } return strm; } template std::istream &ReadType(std::istream &strm, std::vector *c) { return internal::ReadVectorType(strm, c); } template std::istream &ReadType(std::istream &strm, std::list *c) { return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {}); } template std::istream &ReadType(std::istream &strm, std::set *c) { return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {}); } template std::istream &ReadType(std::istream &strm, std::map *c) { return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {}); } template std::istream &ReadType(std::istream &strm, std::unordered_set *c) { return internal::ReadContainerType( strm, c, [](decltype(c) v, int n) { v->reserve(n); }); } template std::istream &ReadType(std::istream &strm, std::unordered_map *c) { return internal::ReadContainerType( strm, c, [](decltype(c) v, int n) { v->reserve(n); }); } // Writes types to an output stream. // Generic case. template ::value && // `string_view` is handled separately below. !std::is_convertible::value, T>::type * = nullptr> inline std::ostream &WriteType(std::ostream &strm, const T t) { t.Write(strm); return strm; } // Numeric (boolean, integral, floating-point) or enum case. template , T> * = nullptr> inline std::ostream &WriteType(std::ostream &strm, const T t) { return strm.write(reinterpret_cast(&t), sizeof(T)); } // Numeric (boolean, integral, floating-point) or enum case only. template inline std::ostream &WriteType(std::ostream &strm, size_t n, const T *t) { static_assert(internal::IsScalarIOTypeV, "Type not supported for batch write."); return strm.write(reinterpret_cast(t), sizeof(T) * n); } inline std::ostream &WriteType(std::ostream &strm, std::string_view s) { int32_t ns = s.size(); WriteType(strm, ns); return strm.write(s.data(), ns); } // Declares types that can be written to an output stream. template std::ostream &WriteType(std::ostream &strm, const std::vector &c); template std::ostream &WriteType(std::ostream &strm, const std::list &c); template std::ostream &WriteType(std::ostream &strm, const std::set &c); template std::ostream &WriteType(std::ostream &strm, const std::map &c); template std::ostream &WriteType(std::ostream &strm, const std::unordered_map &c); template std::ostream &WriteType(std::ostream &strm, const std::unordered_set &c); // Pair case. template inline std::ostream &WriteType(std::ostream &strm, const std::pair &p) { WriteType(strm, p.first); WriteType(strm, p.second); return strm; } namespace internal { template std::ostream &WriteSequence(std::ostream &strm, const C &c) { for (const auto &e : c) { WriteType(strm, e); } return strm; } template std::ostream &WriteContainer(std::ostream &strm, const C &c) { const int64_t n = c.size(); WriteType(strm, n); WriteSequence(strm, c); return strm; } } // namespace internal template std::ostream &WriteType(std::ostream &strm, const std::array &c) { return internal::WriteSequence(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::vector &c) { return internal::WriteContainer(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::list &c) { return internal::WriteContainer(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::set &c) { return internal::WriteContainer(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::map &c) { return internal::WriteContainer(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::unordered_map &c) { return internal::WriteContainer(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::unordered_set &c) { return internal::WriteContainer(strm, c); } // Utilities for converting between int64_t or Weight and string. // Parses a 64-bit signed integer in some base out of an input string. The // string should consist only of digits (no prefixes such as "0x") and an // optionally preceding minus. Returns a value iff the entirety of the string is // consumed during integer parsing, otherwise returns `std::nullopt`. std::optional ParseInt64(std::string_view s, int base = 10); int64_t StrToInt64(std::string_view s, std::string_view source, size_t nline, bool * error = nullptr); template Weight StrToWeight(std::string_view s) { Weight w; std::istringstream strm(std::string{s}); strm >> w; if (!strm) { FSTERROR() << "StrToWeight: Bad weight: " << s; return Weight::NoWeight(); } return w; } template std::string WeightToStr(Weight w) { std::ostringstream strm; strm.precision(9); strm << w; return strm.str(); } // Utilities for reading/writing integer pairs (typically labels). template bool ReadIntPairs(std::string_view source, std::vector> *pairs) { std::ifstream strm(std::string(source), std::ios_base::in); if (!strm) { LOG(ERROR) << "ReadIntPairs: Can't open file: " << source; return false; } const int kLineLen = 8096; char line[kLineLen]; size_t nline = 0; pairs->clear(); while (strm.getline(line, kLineLen)) { ++nline; std::vector col = StrSplit(line, ByAnyChar("\n\t "), SkipEmpty()); // empty line or comment? if (col.empty() || col[0].empty() || col[0][0] == '#') continue; if (col.size() != 2) { LOG(ERROR) << "ReadIntPairs: Bad number of columns, " << "file = " << source << ", line = " << nline; return false; } bool err; I i1 = StrToInt64(col[0], source, nline, &err); if (err) return false; I i2 = StrToInt64(col[1], source, nline, &err); if (err) return false; pairs->emplace_back(i1, i2); } return true; } template bool WriteIntPairs(std::string_view source, const std::vector> &pairs) { std::ofstream fstrm; if (!source.empty()) { fstrm.open(std::string(source)); if (!fstrm) { LOG(ERROR) << "WriteIntPairs: Can't open file: " << source; return false; } } std::ostream &ostrm = fstrm.is_open() ? fstrm : std::cout; for (const auto &pair : pairs) { ostrm << pair.first << "\t" << pair.second << "\n"; } return !!ostrm; } // Utilities for reading/writing label pairs. template bool ReadLabelPairs(std::string_view source, std::vector> *pairs) { return ReadIntPairs(source, pairs); } template bool WriteLabelPairs(std::string_view source, const std::vector> &pairs) { return WriteIntPairs(source, pairs); } // Utilities for converting a type name to a legal C symbol. void ConvertToLegalCSymbol(std::string *s); // Utilities for stream I/O. bool AlignInput(std::istream &strm, size_t align = MappedFile::kArchAlignment); bool AlignOutput(std::ostream &strm, size_t align = MappedFile::kArchAlignment); // An associative container for which testing membership is faster than an STL // set if members are restricted to an interval that excludes most non-members. // A Key must have ==, !=, and < operators defined. Element NoKey should be a // key that marks an uninitialized key and is otherwise unused. Find() returns // an STL const_iterator to the match found, otherwise it equals End(). template class CompactSet { public: using const_iterator = typename std::set::const_iterator; CompactSet() : min_key_(NoKey), max_key_(NoKey) {} CompactSet(const CompactSet &) = default; void Insert(Key key) { set_.insert(key); if (min_key_ == NoKey || key < min_key_) min_key_ = key; if (max_key_ == NoKey || max_key_ < key) max_key_ = key; } void Erase(Key key) { set_.erase(key); if (set_.empty()) { min_key_ = max_key_ = NoKey; } else if (key == min_key_) { ++min_key_; } else if (key == max_key_) { --max_key_; } } void Clear() { set_.clear(); min_key_ = max_key_ = NoKey; } const_iterator Find(Key key) const { if (min_key_ == NoKey || key < min_key_ || max_key_ < key) { return set_.end(); } else { return set_.find(key); } } bool Member(Key key) const { if (min_key_ == NoKey || key < min_key_ || max_key_ < key) { return false; // out of range } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) { return true; // dense range } else { return set_.count(key); } } const_iterator Begin() const { return set_.begin(); } const_iterator End() const { return set_.end(); } // All stored keys are greater than or equal to this value. Key LowerBound() const { return min_key_; } // All stored keys are less than or equal to this value. Key UpperBound() const { return max_key_; } private: std::set set_; Key min_key_; Key max_key_; void operator=(const CompactSet &) = delete; }; } // namespace fst #endif // FST_UTIL_H_