|
|
// Copyright 2005-2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the 'License');
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an 'AS IS' BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// See www.openfst.org for extensive documentation on this weighted
// finite-state transducer library.
//
// Class to represent and operate on sets of intervals.
#ifndef FST_INTERVAL_SET_H_
#define FST_INTERVAL_SET_H_
#include <algorithm>
#include <initializer_list>
#include <iostream>
#include <istream>
#include <ostream>
#include <vector>
#include <fst/util.h>
namespace fst {
// Half-open integral interval [a, b) of signed integers of type T.
template <class T> struct IntInterval { T begin; T end;
IntInterval() : begin(-1), end(-1) {}
IntInterval(T begin, T end) : begin(begin), end(end) {}
bool operator<(const IntInterval<T> &i) const { return begin < i.begin || (begin == i.begin && end > i.end); }
bool operator==(const IntInterval<T> &i) const { return begin == i.begin && end == i.end; }
bool operator!=(const IntInterval<T> &i) const { return begin != i.begin || end != i.end; }
std::istream &Read(std::istream &strm) { T n; ReadType(strm, &n); begin = n; ReadType(strm, &n); end = n; return strm; }
std::ostream &Write(std::ostream &strm) const { T n = begin; WriteType(strm, n); n = end; WriteType(strm, n); return strm; } };
// Stores IntIntervals<T> in a vector. In addition, keeps the count of points in
// all intervals.
template <class T> class VectorIntervalStore { public: using Interval = IntInterval<T>; using Iterator = typename std::vector<Interval>::const_iterator;
VectorIntervalStore() : count_(-1) {} VectorIntervalStore(std::initializer_list<Interval> intervals_init) : intervals_(intervals_init), count_(-1) {}
std::vector<Interval> *MutableIntervals() { return &intervals_; }
const Interval *Intervals() const { return intervals_.data(); }
T Size() const { return intervals_.size(); }
T Count() const { return count_; }
void SetCount(T count) { count_ = count; }
void Clear() { intervals_.clear(); count_ = 0; }
Iterator begin() const { return intervals_.begin(); }
Iterator end() const { return intervals_.end(); }
std::istream &Read(std::istream &strm) { ReadType(strm, &intervals_); return ReadType(strm, &count_); }
std::ostream &Write(std::ostream &strm) const { WriteType(strm, intervals_); return WriteType(strm, count_); }
private: std::vector<Interval> intervals_; T count_; };
// Stores and operates on a set of half-open integral intervals [a, b)
// of signed integers of type T.
template <class T, class Store = VectorIntervalStore<T>> class IntervalSet { public: using Interval = IntInterval<T>;
IntervalSet(std::initializer_list<Interval> intervals_init) : intervals_(intervals_init) {}
template <class... A> explicit IntervalSet(A... args) : intervals_(args...) {}
// Returns the interval set as a vector.
std::vector<Interval> *MutableIntervals() { return intervals_.MutableIntervals(); }
// Returns a pointer to an array of Size() elements.
const Interval *Intervals() const { return intervals_.Intervals(); }
bool Empty() const { return Size() == 0; }
T Size() const { return intervals_.Size(); }
// Number of points in the intervals (undefined if not normalized).
T Count() const { return intervals_.Count(); }
void Clear() { intervals_.Clear(); }
// Adds an interval set to the set. The result may not be normalized.
void Union(const IntervalSet<T, Store> &iset) { intervals_.MutableIntervals()->insert(intervals_.MutableIntervals()->end(), iset.intervals_.begin(), iset.intervals_.end()); }
// Requires intervals be normalized.
bool Member(T value) const { const Interval interval(value, value); auto lb = std::lower_bound(intervals_.begin(), intervals_.end(), interval); if (lb == intervals_.begin()) return false; return (--lb)->end > value; }
// Requires intervals be normalized.
bool operator==(const IntervalSet<T, Store> &iset) const { return Size() == iset.Size() && std::equal(intervals_.begin(), intervals_.end(), iset.intervals_.begin()); }
// Requires intervals be normalized.
bool operator!=(const IntervalSet<T, Store> &iset) const { return Size() != iset.Size() || !std::equal(intervals_.begin(), intervals_.end(), iset.intervals_.begin()); }
bool Singleton() const { return Size() == 1 && intervals_.begin()->begin + 1 == intervals_.begin()->end; }
// Sorts, collapses overlapping and adjacent interals, and sets count.
void Normalize();
// Intersects an interval set with the set. Requires intervals be normalized.
// The result is normalized.
void Intersect(const IntervalSet<T, Store> &iset, IntervalSet<T, Store> *oset) const;
// Complements the set w.r.t [0, maxval). Requires intervals be normalized.
// The result is normalized.
void Complement(T maxval, IntervalSet<T, Store> *oset) const;
// Subtract an interval set from the set. Requires intervals be normalized.
// The result is normalized.
void Difference(const IntervalSet<T, Store> &iset, IntervalSet<T, Store> *oset) const;
// Determines if an interval set overlaps with the set. Requires intervals be
// normalized.
bool Overlaps(const IntervalSet<T, Store> &iset) const;
// Determines if an interval set overlaps with the set but neither is
// contained in the other. Requires intervals be normalized.
bool StrictlyOverlaps(const IntervalSet<T, Store> &iset) const;
// Determines if an interval set is contained within the set. Requires
// intervals be normalized.
bool Contains(const IntervalSet<T, Store> &iset) const;
std::istream &Read(std::istream &strm) { return intervals_.Read(strm); }
std::ostream &Write(std::ostream &strm) const { return intervals_.Write(strm); }
typename Store::Iterator begin() const { return intervals_.begin(); }
typename Store::Iterator end() const { return intervals_.end(); }
private: Store intervals_; };
// Sorts, collapses overlapping and adjacent intervals, and sets count.
template <typename T, class Store> void IntervalSet<T, Store>::Normalize() { auto &intervals = *intervals_.MutableIntervals(); std::sort(intervals.begin(), intervals.end()); T count = 0; T size = 0; for (T i = 0; i < intervals.size(); ++i) { auto &inti = intervals[i]; if (inti.begin == inti.end) continue; for (T j = i + 1; j < intervals.size(); ++j) { auto &intj = intervals[j]; if (intj.begin > inti.end) break; if (intj.end > inti.end) inti.end = intj.end; ++i; } count += inti.end - inti.begin; intervals[size++] = inti; } intervals.resize(size); intervals_.SetCount(count); }
// Intersects an interval set with the set. Requires intervals be normalized.
// The result is normalized.
template <typename T, class Store> void IntervalSet<T, Store>::Intersect(const IntervalSet<T, Store> &iset, IntervalSet<T, Store> *oset) const { auto *ointervals = oset->MutableIntervals(); auto it1 = intervals_.begin(); auto it2 = iset.intervals_.begin(); ointervals->clear(); T count = 0; while (it1 != intervals_.end() && it2 != iset.intervals_.end()) { if (it1->end <= it2->begin) { ++it1; } else if (it2->end <= it1->begin) { ++it2; } else { ointervals->emplace_back(std::max(it1->begin, it2->begin), std::min(it1->end, it2->end)); count += ointervals->back().end - ointervals->back().begin; if (it1->end < it2->end) { ++it1; } else { ++it2; } } } oset->intervals_.SetCount(count); }
// Complements the set w.r.t [0, maxval). Requires intervals be normalized.
// The result is normalized.
template <typename T, class Store> void IntervalSet<T, Store>::Complement(T maxval, IntervalSet<T, Store> *oset) const { auto *ointervals = oset->MutableIntervals(); ointervals->clear(); T count = 0; Interval interval; interval.begin = 0; for (const auto current_interval : intervals_) { interval.end = std::min(current_interval.begin, maxval); if ((interval.begin) < (interval.end)) { ointervals->push_back(interval); count += interval.end - interval.begin; } interval.begin = current_interval.end; } interval.end = maxval; if ((interval.begin) < (interval.end)) { ointervals->push_back(interval); count += interval.end - interval.begin; } oset->intervals_.SetCount(count); }
// Subtract an interval set from the set. Requires intervals be normalized.
// The result is normalized.
template <typename T, class Store> void IntervalSet<T, Store>::Difference(const IntervalSet<T, Store> &iset, IntervalSet<T, Store> *oset) const { if (Empty()) { oset->MutableIntervals()->clear(); oset->intervals_.SetCount(0); } else { IntervalSet<T, Store> cset; iset.Complement(intervals_.Intervals()[intervals_.Size() - 1].end, &cset); Intersect(cset, oset); } }
// Determines if an interval set overlaps with the set. Requires intervals be
// normalized.
template <typename T, class Store> bool IntervalSet<T, Store>::Overlaps(const IntervalSet<T, Store> &iset) const { auto it1 = intervals_.begin(); auto it2 = iset.intervals_.begin(); while (it1 != intervals_.end() && it2 != iset.intervals_.end()) { if (it1->end <= it2->begin) { ++it1; } else if (it2->end <= it1->begin) { ++it2; } else { return true; } } return false; }
// Determines if an interval set overlaps with the set but neither is contained
// in the other. Requires intervals be normalized.
template <typename T, class Store> bool IntervalSet<T, Store>::StrictlyOverlaps( const IntervalSet<T, Store> &iset) const { auto it1 = intervals_.begin(); auto it2 = iset.intervals_.begin(); bool only1 = false; // Point in intervals_ but not intervals.
bool only2 = false; // Point in intervals but not intervals_.
bool overlap = false; // Point in both intervals_ and intervals.
while (it1 != intervals_.end() && it2 != iset.intervals_.end()) { if (it1->end <= it2->begin) { // no overlap - it1 first
only1 = true; ++it1; } else if (it2->end <= it1->begin) { // no overlap - it2 first
only2 = true; ++it2; } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals
overlap = true; ++it1; ++it2; } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2
only2 = true; overlap = true; ++it1; } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1
only1 = true; overlap = true; ++it2; } else { // Strict overlap.
only1 = true; only2 = true; overlap = true; } if (only1 == true && only2 == true && overlap == true) return true; } if (it1 != intervals_.end()) only1 = true; if (it2 != iset.intervals_.end()) only2 = true; return only1 == true && only2 == true && overlap == true; }
// Determines if an interval set is contained within the set. Requires intervals
// be normalized.
template <typename T, class Store> bool IntervalSet<T, Store>::Contains(const IntervalSet<T, Store> &iset) const { if (iset.Count() > Count()) return false; auto it1 = intervals_.begin(); auto it2 = iset.intervals_.begin(); while (it1 != intervals_.end() && it2 != iset.intervals_.end()) { if ((it1->end) <= (it2->begin)) { // No overlap; it1 first.
++it1; } else if ((it2->begin) < (it1->begin) || (it2->end) > (it1->end)) { // No C.
return false; } else if (it2->end == it1->end) { ++it1; ++it2; } else { ++it2; } } return it2 == iset.intervals_.end(); }
template <typename T, class Store> std::ostream &operator<<(std::ostream &strm, const IntervalSet<T, Store> &s) { strm << "{"; for (T i = 0; i < s.Size(); ++i) { if (i > 0) { strm << ","; } const auto &interval = s.Intervals()[i]; strm << "[" << interval.begin << "," << interval.end << ")"; } strm << "}"; return strm; }
} // namespace fst
#endif // FST_INTERVAL_SET_H_
|