You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

322 lines
8.7 KiB

  1. // Copyright 2005-2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the 'License');
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an 'AS IS' BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // See www.openfst.org for extensive documentation on this weighted
  16. // finite-state transducer library.
  17. #ifndef FST_COMPAT_H_
  18. #define FST_COMPAT_H_
  19. #include <algorithm>
  20. #include <climits>
  21. #include <cstddef>
  22. #include <cstdint>
  23. #include <cstdlib>
  24. #include <cstring>
  25. #include <iostream>
  26. #include <iterator>
  27. #include <memory>
  28. #include <numeric>
  29. #include <string>
  30. #include <string_view>
  31. #include <type_traits>
  32. #include <utility>
  33. #include <vector>
  34. #if defined(__GNUC__) || defined(__clang__)
  35. #define OPENFST_DEPRECATED(message) __attribute__((deprecated(message)))
  36. #elif defined(_MSC_VER)
  37. #define OPENFST_DEPRECATED(message) [[deprecated(message)]]
  38. #else
  39. #define OPENFST_DEPRECATED(message)
  40. #endif
  41. namespace fst {
  42. // Downcasting.
  43. template <typename To, typename From>
  44. inline To down_cast(From *f) {
  45. return static_cast<To>(f);
  46. }
  47. template <typename To, typename From>
  48. inline To down_cast(From &f) {
  49. return static_cast<To>(f);
  50. }
  51. // Bitcasting.
  52. template <class Dest, class Source>
  53. inline Dest bit_cast(const Source &source) {
  54. static_assert(sizeof(Dest) == sizeof(Source),
  55. "Bitcasting unsafe for specified types");
  56. Dest dest;
  57. memcpy(&dest, &source, sizeof(dest));
  58. return dest;
  59. }
  60. template <typename T>
  61. T UnalignedLoad(const void *p) {
  62. T t;
  63. memcpy(&t, p, sizeof t);
  64. return t;
  65. }
  66. namespace internal {
  67. // TODO(kbg): Remove this once we migrate to C++20.
  68. template <typename T>
  69. struct type_identity {
  70. using type = T;
  71. };
  72. template <typename T>
  73. using type_identity_t = typename type_identity<T>::type;
  74. } // namespace internal
  75. template <typename To>
  76. constexpr To implicit_cast(typename internal::type_identity_t<To> to) {
  77. return to;
  78. }
  79. // Checksums.
  80. class CheckSummer {
  81. public:
  82. CheckSummer();
  83. void Reset();
  84. void Update(std::string_view data);
  85. std::string Digest() { return check_sum_; }
  86. private:
  87. static constexpr int kCheckSumLength = 32;
  88. int count_;
  89. std::string check_sum_;
  90. CheckSummer(const CheckSummer &) = delete;
  91. CheckSummer &operator=(const CheckSummer &) = delete;
  92. };
  93. // Defines make_unique_for_overwrite using a standard definition that should be
  94. // compatible with the C++20 definition. That is, all compiling uses of
  95. // `std::make_unique_for_overwrite` should have the same result with
  96. // `fst::make_unique_for_overwrite`. Note that the reverse doesn't
  97. // necessarily hold.
  98. // TODO(kbg): Remove these once we migrate to C++20.
  99. template <typename T>
  100. std::unique_ptr<T> make_unique_for_overwrite() {
  101. return std::unique_ptr<T>(new T);
  102. }
  103. template <typename T>
  104. std::unique_ptr<T> make_unique_for_overwrite(size_t n) {
  105. return std::unique_ptr<T>(new std::remove_extent_t<T>[n]);
  106. }
  107. template <typename T>
  108. std::unique_ptr<T> WrapUnique(T *ptr) {
  109. return std::unique_ptr<T>(ptr);
  110. }
  111. // Range utilities
  112. // A range adaptor for a pair of iterators.
  113. //
  114. // This just wraps two iterators into a range-compatible interface. Nothing
  115. // fancy at all.
  116. template <typename IteratorT>
  117. class iterator_range {
  118. public:
  119. using iterator = IteratorT;
  120. using const_iterator = IteratorT;
  121. using value_type = typename std::iterator_traits<IteratorT>::value_type;
  122. iterator_range() : begin_iterator_(), end_iterator_() {}
  123. iterator_range(IteratorT begin_iterator, IteratorT end_iterator)
  124. : begin_iterator_(std::move(begin_iterator)),
  125. end_iterator_(std::move(end_iterator)) {}
  126. IteratorT begin() const { return begin_iterator_; }
  127. IteratorT end() const { return end_iterator_; }
  128. private:
  129. IteratorT begin_iterator_, end_iterator_;
  130. };
  131. // Convenience function for iterating over sub-ranges.
  132. //
  133. // This provides a bit of syntactic sugar to make using sub-ranges
  134. // in for loops a bit easier. Analogous to std::make_pair().
  135. template <typename T>
  136. iterator_range<T> make_range(T x, T y) {
  137. return iterator_range<T>(std::move(x), std::move(y));
  138. }
  139. // String munging.
  140. namespace internal {
  141. // Computes size of joined string.
  142. template <class S>
  143. size_t GetResultSize(const std::vector<S> &elements, size_t s_size) {
  144. const auto lambda = [](size_t partial, const S &right) {
  145. return partial + right.size();
  146. };
  147. return std::accumulate(elements.begin(), elements.end(), 0, lambda) +
  148. elements.size() * s_size - s_size;
  149. }
  150. } // namespace internal
  151. template <class S>
  152. std::string StringJoin(const std::vector<S> &elements, std::string_view delim) {
  153. std::string result;
  154. if (elements.empty()) return result;
  155. const size_t s_size = delim.size();
  156. result.reserve(internal::GetResultSize(elements, s_size));
  157. auto it = elements.begin();
  158. result.append(it->data(), it->size());
  159. for (++it; it != elements.end(); ++it) {
  160. result.append(delim.data(), s_size);
  161. result.append(it->data(), it->size());
  162. }
  163. return result;
  164. }
  165. template <class S>
  166. std::string StringJoin(const std::vector<S> &elements, char delim) {
  167. const std::string_view view_delim(&delim, 1);
  168. return StringJoin(elements, view_delim);
  169. }
  170. struct SkipEmpty {};
  171. struct ByAnyChar {
  172. public:
  173. explicit ByAnyChar(std::string_view sp) : delimiters(sp) {}
  174. std::string delimiters;
  175. };
  176. namespace internal {
  177. class StringSplitter {
  178. public:
  179. using const_iterator = std::vector<std::string_view>::const_iterator;
  180. using value_type = std::string_view;
  181. StringSplitter(std::string_view string, std::string delim,
  182. bool skip_empty = false)
  183. : string_(std::move(string)),
  184. delim_(std::move(delim)),
  185. skip_empty_(skip_empty),
  186. vec_(SplitToSv()) {}
  187. inline operator // NOLINT(google-explicit-constructor)
  188. std::vector<std::string_view>() && {
  189. return std::move(vec_);
  190. }
  191. inline operator // NOLINT(google-explicit-constructor)
  192. std::vector<std::string>() {
  193. std::vector<std::string> str_vec(vec_.begin(), vec_.end());
  194. return str_vec;
  195. }
  196. const_iterator begin() const { return vec_.begin(); }
  197. const_iterator end() const { return vec_.end(); }
  198. private:
  199. std::vector<std::string_view> SplitToSv();
  200. std::string_view string_;
  201. std::string delim_;
  202. bool skip_empty_;
  203. std::vector<std::string_view> vec_;
  204. };
  205. } // namespace internal
  206. // `StrSplit` replacements. Only support splitting on `char` or
  207. // `ByAnyChar` (notable not on a multi-char string delimiter), and with or
  208. // without `SkipEmpty`.
  209. internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim);
  210. internal::StringSplitter StrSplit(std::string_view full, char delim);
  211. internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim,
  212. SkipEmpty);
  213. internal::StringSplitter StrSplit(std::string_view full, char delim, SkipEmpty);
  214. void StripTrailingAsciiWhitespace(std::string *full);
  215. std::string_view StripTrailingAsciiWhitespace(std::string_view full);
  216. class StringOrInt {
  217. public:
  218. template <typename T, typename = std::enable_if_t<
  219. std::is_convertible_v<T, std::string_view>>>
  220. StringOrInt(T s) : str_(std::string(s)) {} // NOLINT
  221. StringOrInt(int i) { // NOLINT
  222. str_ = std::to_string(i);
  223. }
  224. const std::string &Get() const { return str_; }
  225. private:
  226. std::string str_;
  227. };
  228. // TODO(kbg): Make this work with variadic template, maybe.
  229. inline std::string StrCat(const StringOrInt &s1, const StringOrInt &s2) {
  230. return s1.Get() + s2.Get();
  231. }
  232. inline std::string StrCat(const StringOrInt &s1, const StringOrInt &s2,
  233. const StringOrInt &s3) {
  234. return s1.Get() + StrCat(s2, s3);
  235. }
  236. inline std::string StrCat(const StringOrInt &s1, const StringOrInt &s2,
  237. const StringOrInt &s3, const StringOrInt &s4) {
  238. return s1.Get() + StrCat(s2, s3, s4);
  239. }
  240. inline std::string StrCat(const StringOrInt &s1, const StringOrInt &s2,
  241. const StringOrInt &s3, const StringOrInt &s4,
  242. const StringOrInt &s5) {
  243. return s1.Get() + StrCat(s2, s3, s4, s5);
  244. }
  245. // TODO(agutkin): Remove this once we migrate to C++20, where `starts_with`
  246. // is available.
  247. inline bool StartsWith(std::string_view text, std::string_view prefix) {
  248. return prefix.empty() ||
  249. (text.size() >= prefix.size() &&
  250. memcmp(text.data(), prefix.data(), prefix.size()) == 0);
  251. }
  252. inline bool ConsumePrefix(std::string_view *s, std::string_view expected) {
  253. if (!StartsWith(*s, expected)) return false;
  254. s->remove_prefix(expected.size());
  255. return true;
  256. }
  257. } // namespace fst
  258. #endif // FST_COMPAT_H_