You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

418 lines
13 KiB

  1. // Copyright 2005-2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the 'License');
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an 'AS IS' BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // See www.openfst.org for extensive documentation on this weighted
  16. // finite-state transducer library.
  17. //
  18. // Class to represent and operate on sets of intervals.
  19. #ifndef FST_INTERVAL_SET_H_
  20. #define FST_INTERVAL_SET_H_
  21. #include <algorithm>
  22. #include <initializer_list>
  23. #include <iostream>
  24. #include <istream>
  25. #include <ostream>
  26. #include <vector>
  27. #include <fst/util.h>
  28. namespace fst {
  29. // Half-open integral interval [a, b) of signed integers of type T.
  30. template <class T>
  31. struct IntInterval {
  32. T begin;
  33. T end;
  34. IntInterval() : begin(-1), end(-1) {}
  35. IntInterval(T begin, T end) : begin(begin), end(end) {}
  36. bool operator<(const IntInterval<T> &i) const {
  37. return begin < i.begin || (begin == i.begin && end > i.end);
  38. }
  39. bool operator==(const IntInterval<T> &i) const {
  40. return begin == i.begin && end == i.end;
  41. }
  42. bool operator!=(const IntInterval<T> &i) const {
  43. return begin != i.begin || end != i.end;
  44. }
  45. std::istream &Read(std::istream &strm) {
  46. T n;
  47. ReadType(strm, &n);
  48. begin = n;
  49. ReadType(strm, &n);
  50. end = n;
  51. return strm;
  52. }
  53. std::ostream &Write(std::ostream &strm) const {
  54. T n = begin;
  55. WriteType(strm, n);
  56. n = end;
  57. WriteType(strm, n);
  58. return strm;
  59. }
  60. };
  61. // Stores IntIntervals<T> in a vector. In addition, keeps the count of points in
  62. // all intervals.
  63. template <class T>
  64. class VectorIntervalStore {
  65. public:
  66. using Interval = IntInterval<T>;
  67. using Iterator = typename std::vector<Interval>::const_iterator;
  68. VectorIntervalStore() : count_(-1) {}
  69. VectorIntervalStore(std::initializer_list<Interval> intervals_init)
  70. : intervals_(intervals_init), count_(-1) {}
  71. std::vector<Interval> *MutableIntervals() { return &intervals_; }
  72. const Interval *Intervals() const { return intervals_.data(); }
  73. T Size() const { return intervals_.size(); }
  74. T Count() const { return count_; }
  75. void SetCount(T count) { count_ = count; }
  76. void Clear() {
  77. intervals_.clear();
  78. count_ = 0;
  79. }
  80. Iterator begin() const { return intervals_.begin(); }
  81. Iterator end() const { return intervals_.end(); }
  82. std::istream &Read(std::istream &strm) {
  83. ReadType(strm, &intervals_);
  84. return ReadType(strm, &count_);
  85. }
  86. std::ostream &Write(std::ostream &strm) const {
  87. WriteType(strm, intervals_);
  88. return WriteType(strm, count_);
  89. }
  90. private:
  91. std::vector<Interval> intervals_;
  92. T count_;
  93. };
  94. // Stores and operates on a set of half-open integral intervals [a, b)
  95. // of signed integers of type T.
  96. template <class T, class Store = VectorIntervalStore<T>>
  97. class IntervalSet {
  98. public:
  99. using Interval = IntInterval<T>;
  100. IntervalSet(std::initializer_list<Interval> intervals_init)
  101. : intervals_(intervals_init) {}
  102. template <class... A>
  103. explicit IntervalSet(A... args) : intervals_(args...) {}
  104. // Returns the interval set as a vector.
  105. std::vector<Interval> *MutableIntervals() {
  106. return intervals_.MutableIntervals();
  107. }
  108. // Returns a pointer to an array of Size() elements.
  109. const Interval *Intervals() const { return intervals_.Intervals(); }
  110. bool Empty() const { return Size() == 0; }
  111. T Size() const { return intervals_.Size(); }
  112. // Number of points in the intervals (undefined if not normalized).
  113. T Count() const { return intervals_.Count(); }
  114. void Clear() { intervals_.Clear(); }
  115. // Adds an interval set to the set. The result may not be normalized.
  116. void Union(const IntervalSet<T, Store> &iset) {
  117. intervals_.MutableIntervals()->insert(intervals_.MutableIntervals()->end(),
  118. iset.intervals_.begin(),
  119. iset.intervals_.end());
  120. }
  121. // Requires intervals be normalized.
  122. bool Member(T value) const {
  123. const Interval interval(value, value);
  124. auto lb = std::lower_bound(intervals_.begin(), intervals_.end(), interval);
  125. if (lb == intervals_.begin()) return false;
  126. return (--lb)->end > value;
  127. }
  128. // Requires intervals be normalized.
  129. bool operator==(const IntervalSet<T, Store> &iset) const {
  130. return Size() == iset.Size() &&
  131. std::equal(intervals_.begin(), intervals_.end(),
  132. iset.intervals_.begin());
  133. }
  134. // Requires intervals be normalized.
  135. bool operator!=(const IntervalSet<T, Store> &iset) const {
  136. return Size() != iset.Size() ||
  137. !std::equal(intervals_.begin(), intervals_.end(),
  138. iset.intervals_.begin());
  139. }
  140. bool Singleton() const {
  141. return Size() == 1 &&
  142. intervals_.begin()->begin + 1 == intervals_.begin()->end;
  143. }
  144. // Sorts, collapses overlapping and adjacent interals, and sets count.
  145. void Normalize();
  146. // Intersects an interval set with the set. Requires intervals be normalized.
  147. // The result is normalized.
  148. void Intersect(const IntervalSet<T, Store> &iset,
  149. IntervalSet<T, Store> *oset) const;
  150. // Complements the set w.r.t [0, maxval). Requires intervals be normalized.
  151. // The result is normalized.
  152. void Complement(T maxval, IntervalSet<T, Store> *oset) const;
  153. // Subtract an interval set from the set. Requires intervals be normalized.
  154. // The result is normalized.
  155. void Difference(const IntervalSet<T, Store> &iset,
  156. IntervalSet<T, Store> *oset) const;
  157. // Determines if an interval set overlaps with the set. Requires intervals be
  158. // normalized.
  159. bool Overlaps(const IntervalSet<T, Store> &iset) const;
  160. // Determines if an interval set overlaps with the set but neither is
  161. // contained in the other. Requires intervals be normalized.
  162. bool StrictlyOverlaps(const IntervalSet<T, Store> &iset) const;
  163. // Determines if an interval set is contained within the set. Requires
  164. // intervals be normalized.
  165. bool Contains(const IntervalSet<T, Store> &iset) const;
  166. std::istream &Read(std::istream &strm) { return intervals_.Read(strm); }
  167. std::ostream &Write(std::ostream &strm) const {
  168. return intervals_.Write(strm);
  169. }
  170. typename Store::Iterator begin() const { return intervals_.begin(); }
  171. typename Store::Iterator end() const { return intervals_.end(); }
  172. private:
  173. Store intervals_;
  174. };
  175. // Sorts, collapses overlapping and adjacent intervals, and sets count.
  176. template <typename T, class Store>
  177. void IntervalSet<T, Store>::Normalize() {
  178. auto &intervals = *intervals_.MutableIntervals();
  179. std::sort(intervals.begin(), intervals.end());
  180. T count = 0;
  181. T size = 0;
  182. for (T i = 0; i < intervals.size(); ++i) {
  183. auto &inti = intervals[i];
  184. if (inti.begin == inti.end) continue;
  185. for (T j = i + 1; j < intervals.size(); ++j) {
  186. auto &intj = intervals[j];
  187. if (intj.begin > inti.end) break;
  188. if (intj.end > inti.end) inti.end = intj.end;
  189. ++i;
  190. }
  191. count += inti.end - inti.begin;
  192. intervals[size++] = inti;
  193. }
  194. intervals.resize(size);
  195. intervals_.SetCount(count);
  196. }
  197. // Intersects an interval set with the set. Requires intervals be normalized.
  198. // The result is normalized.
  199. template <typename T, class Store>
  200. void IntervalSet<T, Store>::Intersect(const IntervalSet<T, Store> &iset,
  201. IntervalSet<T, Store> *oset) const {
  202. auto *ointervals = oset->MutableIntervals();
  203. auto it1 = intervals_.begin();
  204. auto it2 = iset.intervals_.begin();
  205. ointervals->clear();
  206. T count = 0;
  207. while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
  208. if (it1->end <= it2->begin) {
  209. ++it1;
  210. } else if (it2->end <= it1->begin) {
  211. ++it2;
  212. } else {
  213. ointervals->emplace_back(std::max(it1->begin, it2->begin),
  214. std::min(it1->end, it2->end));
  215. count += ointervals->back().end - ointervals->back().begin;
  216. if (it1->end < it2->end) {
  217. ++it1;
  218. } else {
  219. ++it2;
  220. }
  221. }
  222. }
  223. oset->intervals_.SetCount(count);
  224. }
  225. // Complements the set w.r.t [0, maxval). Requires intervals be normalized.
  226. // The result is normalized.
  227. template <typename T, class Store>
  228. void IntervalSet<T, Store>::Complement(T maxval,
  229. IntervalSet<T, Store> *oset) const {
  230. auto *ointervals = oset->MutableIntervals();
  231. ointervals->clear();
  232. T count = 0;
  233. Interval interval;
  234. interval.begin = 0;
  235. for (const auto current_interval : intervals_) {
  236. interval.end = std::min(current_interval.begin, maxval);
  237. if ((interval.begin) < (interval.end)) {
  238. ointervals->push_back(interval);
  239. count += interval.end - interval.begin;
  240. }
  241. interval.begin = current_interval.end;
  242. }
  243. interval.end = maxval;
  244. if ((interval.begin) < (interval.end)) {
  245. ointervals->push_back(interval);
  246. count += interval.end - interval.begin;
  247. }
  248. oset->intervals_.SetCount(count);
  249. }
  250. // Subtract an interval set from the set. Requires intervals be normalized.
  251. // The result is normalized.
  252. template <typename T, class Store>
  253. void IntervalSet<T, Store>::Difference(const IntervalSet<T, Store> &iset,
  254. IntervalSet<T, Store> *oset) const {
  255. if (Empty()) {
  256. oset->MutableIntervals()->clear();
  257. oset->intervals_.SetCount(0);
  258. } else {
  259. IntervalSet<T, Store> cset;
  260. iset.Complement(intervals_.Intervals()[intervals_.Size() - 1].end, &cset);
  261. Intersect(cset, oset);
  262. }
  263. }
  264. // Determines if an interval set overlaps with the set. Requires intervals be
  265. // normalized.
  266. template <typename T, class Store>
  267. bool IntervalSet<T, Store>::Overlaps(const IntervalSet<T, Store> &iset) const {
  268. auto it1 = intervals_.begin();
  269. auto it2 = iset.intervals_.begin();
  270. while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
  271. if (it1->end <= it2->begin) {
  272. ++it1;
  273. } else if (it2->end <= it1->begin) {
  274. ++it2;
  275. } else {
  276. return true;
  277. }
  278. }
  279. return false;
  280. }
  281. // Determines if an interval set overlaps with the set but neither is contained
  282. // in the other. Requires intervals be normalized.
  283. template <typename T, class Store>
  284. bool IntervalSet<T, Store>::StrictlyOverlaps(
  285. const IntervalSet<T, Store> &iset) const {
  286. auto it1 = intervals_.begin();
  287. auto it2 = iset.intervals_.begin();
  288. bool only1 = false; // Point in intervals_ but not intervals.
  289. bool only2 = false; // Point in intervals but not intervals_.
  290. bool overlap = false; // Point in both intervals_ and intervals.
  291. while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
  292. if (it1->end <= it2->begin) { // no overlap - it1 first
  293. only1 = true;
  294. ++it1;
  295. } else if (it2->end <= it1->begin) { // no overlap - it2 first
  296. only2 = true;
  297. ++it2;
  298. } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals
  299. overlap = true;
  300. ++it1;
  301. ++it2;
  302. } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2
  303. only2 = true;
  304. overlap = true;
  305. ++it1;
  306. } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1
  307. only1 = true;
  308. overlap = true;
  309. ++it2;
  310. } else { // Strict overlap.
  311. only1 = true;
  312. only2 = true;
  313. overlap = true;
  314. }
  315. if (only1 == true && only2 == true && overlap == true) return true;
  316. }
  317. if (it1 != intervals_.end()) only1 = true;
  318. if (it2 != iset.intervals_.end()) only2 = true;
  319. return only1 == true && only2 == true && overlap == true;
  320. }
  321. // Determines if an interval set is contained within the set. Requires intervals
  322. // be normalized.
  323. template <typename T, class Store>
  324. bool IntervalSet<T, Store>::Contains(const IntervalSet<T, Store> &iset) const {
  325. if (iset.Count() > Count()) return false;
  326. auto it1 = intervals_.begin();
  327. auto it2 = iset.intervals_.begin();
  328. while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
  329. if ((it1->end) <= (it2->begin)) { // No overlap; it1 first.
  330. ++it1;
  331. } else if ((it2->begin) < (it1->begin) ||
  332. (it2->end) > (it1->end)) { // No C.
  333. return false;
  334. } else if (it2->end == it1->end) {
  335. ++it1;
  336. ++it2;
  337. } else {
  338. ++it2;
  339. }
  340. }
  341. return it2 == iset.intervals_.end();
  342. }
  343. template <typename T, class Store>
  344. std::ostream &operator<<(std::ostream &strm, const IntervalSet<T, Store> &s) {
  345. strm << "{";
  346. for (T i = 0; i < s.Size(); ++i) {
  347. if (i > 0) {
  348. strm << ",";
  349. }
  350. const auto &interval = s.Intervals()[i];
  351. strm << "[" << interval.begin << "," << interval.end << ")";
  352. }
  353. strm << "}";
  354. return strm;
  355. }
  356. } // namespace fst
  357. #endif // FST_INTERVAL_SET_H_