|
|
// Copyright 2005-2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the 'License');
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an 'AS IS' BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// See www.openfst.org for extensive documentation on this weighted
// finite-state transducer library.
//
// Classes to allow matching labels leaving FST states.
#ifndef FST_MATCHER_H_
#define FST_MATCHER_H_
#include <sys/types.h>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <map>
#include <memory>
#include <optional>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <fst/log.h>
#include <fst/fst.h>
#include <fst/mutable-fst.h> // for all internal FST accessors.
#include <fst/properties.h>
#include <fst/util.h>
#include <unordered_map>
#include <optional>
namespace fst {
// Matchers find and iterate through requested labels at FST states. In the
// simplest form, these are just some associative map or search keyed on labels.
// More generally, they may implement matching special labels that represent
// sets of labels such as sigma (all), rho (rest), or phi (fail). The Matcher
// interface is:
//
// template <class F>
// class Matcher {
// public:
// using FST = F;
// using Arc = typename FST::Arc;
// using Label = typename Arc::Label;
// using StateId = typename Arc::StateId;
// using Weight = typename Arc::Weight;
//
// // Required constructors. Note:
// // -- the constructors that copy the FST arg are useful for
// // letting the matcher manage the FST through copies
// // (esp with 'safe' copies); e.g. ComposeFst depends on this.
// // -- the constructor that does not copy is useful when the
// // the FST is mutated during the lifetime of the matcher
// // (o.w. the matcher would have its own unmutated deep copy).
//
// // This makes a copy of the FST.
// Matcher(const FST &fst, MatchType type);
// // This doesn't copy the FST.
// Matcher(const FST *fst, MatchType type);
// // This makes a copy of the FST.
// // See Copy() below.
// Matcher(const Matcher &matcher, bool safe = false);
//
// // If safe = true, the copy is thread-safe. See Fst<>::Copy() for
// // further doc.
// Matcher *Copy(bool safe = false) const override;
//
// // Returns the match type that can be provided (depending on compatibility
// // of the input FST). It is either the requested match type, MATCH_NONE,
// // or MATCH_UNKNOWN. If test is false, a costly testing is avoided, but
// // MATCH_UNKNOWN may be returned. If test is true, a definite answer is
// // returned, but may involve more costly computation (e.g., visiting
// // the FST).
// // MatchType Type(bool test) const override;
//
// // Specifies the current state.
// void SetState(StateId s) final;
//
// // Finds matches to a label at the current state, returning true if a match
// // found. kNoLabel matches any non-consuming transitions, e.g., epsilon
// // transitions, which do not require a matching symbol.
// bool Find(Label label) final;
//
// // Iterator methods. Note that initially and after SetState() these have
// // undefined behavior until Find() is called.
//
// bool Done() const final;
//
// const Arc &Value() const final;
//
// void Next() final;
//
// // Returns final weight of a state.
// Weight Final(StateId) const final;
//
// // Indicates preference for being the side used for matching in
// // composition. If the value is kRequirePriority, then it is
// // mandatory that it be used. Calling this method without passing the
// // current state of the matcher invalidates the state of the matcher.
// ssize_t Priority(StateId s) final;
//
// // This specifies the known FST properties as viewed from this matcher. It
// // takes as argument the input FST's known properties.
// uint64_t Properties(uint64_t props) const override;
//
// // Returns matcher flags.
// uint32_t Flags() const override;
//
// // Returns matcher FST.
// const FST &GetFst() const override;
// };
// Basic matcher flags.
// Matcher needs to be used as the matching side in composition for
// at least one state (has kRequirePriority).
inline constexpr uint32_t kRequireMatch = 0x00000001;
// Flags used for basic matchers (see also lookahead.h).
inline constexpr uint32_t kMatcherFlags = kRequireMatch;
// Matcher priority that is mandatory.
inline constexpr ssize_t kRequirePriority = -1;
// Matcher interface, templated on the Arc definition; used for matcher
// specializations that are returned by the InitMatcher FST method.
template <class A> class MatcherBase { public: using Arc = A; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight;
virtual ~MatcherBase() = default;
// Virtual interface.
virtual MatcherBase *Copy(bool safe = false) const = 0; virtual MatchType Type(bool) const = 0; virtual void SetState(StateId) = 0; virtual bool Find(Label) = 0; virtual bool Done() const = 0; virtual const Arc &Value() const = 0; virtual void Next() = 0; virtual const Fst<Arc> &GetFst() const = 0; virtual uint64_t Properties(uint64_t) const = 0;
// Trivial implementations that can be used by derived classes. Full
// devirtualization is expected for any derived class marked final.
virtual uint32_t Flags() const { return 0; }
virtual Weight Final(StateId s) const { return internal::Final(GetFst(), s); }
virtual ssize_t Priority(StateId s) { return internal::NumArcs(GetFst(), s); } };
// A matcher that expects sorted labels on the side to be matched.
// If match_type == MATCH_INPUT, epsilons match the implicit self-loop
// Arc(kNoLabel, 0, Weight::One(), current_state) as well as any
// actual epsilon transitions. If match_type == MATCH_OUTPUT, then
// Arc(0, kNoLabel, Weight::One(), current_state) is instead matched.
template <class F> class SortedMatcher : public MatcherBase<typename F::Arc> { public: using FST = F; using Arc = typename FST::Arc; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight;
using MatcherBase<Arc>::Flags; using MatcherBase<Arc>::Properties;
// Labels >= binary_label will be searched for by binary search;
// o.w. linear search is used.
// This makes a copy of the FST.
SortedMatcher(const FST &fst, MatchType match_type, Label binary_label = 1) : SortedMatcher(fst.Copy(), match_type, binary_label) { owned_fst_.reset(&fst_); }
// Labels >= binary_label will be searched for by binary search;
// o.w. linear search is used.
// This doesn't copy the FST.
SortedMatcher(const FST *fst, MatchType match_type, Label binary_label = 1) : fst_(*fst), state_(kNoStateId), aiter_(std::nullopt), match_type_(match_type), binary_label_(binary_label), match_label_(kNoLabel), narcs_(0), loop_(kNoLabel, 0, Weight::One(), kNoStateId), error_(false) { switch (match_type_) { case MATCH_INPUT: case MATCH_NONE: break; case MATCH_OUTPUT: std::swap(loop_.ilabel, loop_.olabel); break; default: FSTERROR() << "SortedMatcher: Bad match type"; match_type_ = MATCH_NONE; error_ = true; } }
// This makes a copy of the FST.
SortedMatcher(const SortedMatcher &matcher, bool safe = false) : owned_fst_(matcher.fst_.Copy(safe)), fst_(*owned_fst_), state_(kNoStateId), aiter_(std::nullopt), match_type_(matcher.match_type_), binary_label_(matcher.binary_label_), match_label_(kNoLabel), narcs_(0), loop_(matcher.loop_), error_(matcher.error_) {}
~SortedMatcher() override = default;
SortedMatcher *Copy(bool safe = false) const override { return new SortedMatcher(*this, safe); }
MatchType Type(bool test) const override { if (match_type_ == MATCH_NONE) return match_type_; const auto true_prop = match_type_ == MATCH_INPUT ? kILabelSorted : kOLabelSorted; const auto false_prop = match_type_ == MATCH_INPUT ? kNotILabelSorted : kNotOLabelSorted; const auto props = fst_.Properties(true_prop | false_prop, test); if (props & true_prop) { return match_type_; } else if (props & false_prop) { return MATCH_NONE; } else { return MATCH_UNKNOWN; } }
void SetState(StateId s) final { if (state_ == s) return; state_ = s; if (match_type_ == MATCH_NONE) { FSTERROR() << "SortedMatcher: Bad match type"; error_ = true; } aiter_.emplace(fst_, s); aiter_->SetFlags(kArcNoCache, kArcNoCache); narcs_ = internal::NumArcs(fst_, s); loop_.nextstate = s; }
bool Find(Label match_label) final { exact_match_ = true; if (error_) { current_loop_ = false; match_label_ = kNoLabel; return false; } current_loop_ = match_label == 0; match_label_ = match_label == kNoLabel ? 0 : match_label; if (Search()) { return true; } else { return current_loop_; } }
// Positions matcher to the first position where inserting match_label would
// maintain the sort order.
void LowerBound(Label label) { exact_match_ = false; current_loop_ = false; if (error_) { match_label_ = kNoLabel; return; } match_label_ = label; Search(); }
// After Find(), returns false if no more exact matches.
// After LowerBound(), returns false if no more arcs.
bool Done() const final { if (current_loop_) return false; if (aiter_->Done()) return true; if (!exact_match_) return false; aiter_->SetFlags( match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue, kArcValueFlags); return GetLabel() != match_label_; }
const Arc &Value() const final { if (current_loop_) return loop_; aiter_->SetFlags(kArcValueFlags, kArcValueFlags); return aiter_->Value(); }
void Next() final { if (current_loop_) { current_loop_ = false; } else { aiter_->Next(); } }
Weight Final(StateId s) const final { return MatcherBase<Arc>::Final(s); }
ssize_t Priority(StateId s) final { return MatcherBase<Arc>::Priority(s); }
const FST &GetFst() const override { return fst_; }
uint64_t Properties(uint64_t inprops) const override { return inprops | (error_ ? kError : 0); }
size_t Position() const { return aiter_ ? aiter_->Position() : 0; }
private: Label GetLabel() const { const auto &arc = aiter_->Value(); return match_type_ == MATCH_INPUT ? arc.ilabel : arc.olabel; }
bool BinarySearch(); bool LinearSearch(); bool Search();
std::unique_ptr<const FST> owned_fst_; // FST ptr if owned.
const FST &fst_; // FST for matching.
StateId state_; // Matcher state.
mutable std::optional<ArcIterator<FST>> aiter_; // Iterator for current state.
MatchType match_type_; // Type of match to perform.
Label binary_label_; // Least label for binary search.
Label match_label_; // Current label to be matched.
size_t narcs_; // Current state arc count.
Arc loop_; // For non-consuming symbols.
bool current_loop_; // Current arc is the implicit loop.
bool exact_match_; // Exact match or lower bound?
bool error_; // Error encountered?
};
// Returns true iff match to match_label_. The arc iterator is positioned at the
// lower bound, that is, the first element greater than or equal to
// match_label_, or the end if all elements are less than match_label_.
// If multiple elements are equal to the `match_label_`, returns the rightmost
// one.
template <class FST> inline bool SortedMatcher<FST>::BinarySearch() { size_t size = narcs_; if (size == 0) { return false; } size_t high = size - 1; while (size > 1) { const size_t half = size / 2; const size_t mid = high - half; aiter_->Seek(mid); if (GetLabel() >= match_label_) { high = mid; } size -= half; } aiter_->Seek(high); const auto label = GetLabel(); if (label == match_label_) { return true; } if (label < match_label_) { aiter_->Next(); } return false; }
// Returns true iff match to match_label_, positioning arc iterator at lower
// bound.
template <class FST> inline bool SortedMatcher<FST>::LinearSearch() { for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) { const auto label = GetLabel(); if (label == match_label_) return true; if (label > match_label_) break; } return false; }
// Returns true iff match to match_label_, positioning arc iterator at lower
// bound.
template <class FST> inline bool SortedMatcher<FST>::Search() { aiter_->SetFlags( match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue, kArcValueFlags); if (match_label_ >= binary_label_) { return BinarySearch(); } else { return LinearSearch(); } }
// A matcher that stores labels in a per-state hash table populated upon the
// first visit to that state. Sorting is not required. Treatment of
// epsilons are the same as with SortedMatcher.
template <class F> class HashMatcher : public MatcherBase<typename F::Arc> { public: using FST = F; using Arc = typename FST::Arc; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight;
using MatcherBase<Arc>::Flags; using MatcherBase<Arc>::Final; using MatcherBase<Arc>::Priority;
// This makes a copy of the FST.
HashMatcher(const FST &fst, MatchType match_type) : HashMatcher(fst.Copy(), match_type) { owned_fst_.reset(&fst_); }
// This doesn't copy the FST.
HashMatcher(const FST *fst, MatchType match_type) : fst_(*fst), state_(kNoStateId), match_type_(match_type), loop_(kNoLabel, 0, Weight::One(), kNoStateId), error_(false), state_table_(std::make_shared<StateTable>()) { switch (match_type_) { case MATCH_INPUT: case MATCH_NONE: break; case MATCH_OUTPUT: std::swap(loop_.ilabel, loop_.olabel); break; default: FSTERROR() << "HashMatcher: Bad match type"; match_type_ = MATCH_NONE; error_ = true; } }
// This makes a copy of the FST.
HashMatcher(const HashMatcher &matcher, bool safe = false) : owned_fst_(matcher.fst_.Copy(safe)), fst_(*owned_fst_), state_(kNoStateId), match_type_(matcher.match_type_), loop_(matcher.loop_), error_(matcher.error_), state_table_(safe ? std::make_shared<StateTable>() : matcher.state_table_) {}
HashMatcher *Copy(bool safe = false) const override { return new HashMatcher(*this, safe); }
// The argument is ignored as there are no relevant properties to test.
MatchType Type(bool test) const override { return match_type_; }
void SetState(StateId s) final;
bool Find(Label label) final { current_loop_ = label == 0; if (label == 0) { Search(label); return true; } if (label == kNoLabel) label = 0; return Search(label); }
bool Done() const final { if (current_loop_) return false; return label_it_ == label_end_; }
const Arc &Value() const final { if (current_loop_) return loop_; aiter_->Seek(label_it_->second); return aiter_->Value(); }
void Next() final { if (current_loop_) { current_loop_ = false; } else { ++label_it_; } }
const FST &GetFst() const override { return fst_; }
uint64_t Properties(uint64_t inprops) const override { return inprops | (error_ ? kError : 0); }
private: Label GetLabel() const { const auto &arc = aiter_->Value(); return match_type_ == MATCH_INPUT ? arc.ilabel : arc.olabel; }
bool Search(Label match_label);
using LabelTable = std::unordered_multimap<Label, size_t>; using StateTable = std::unordered_map<StateId, std::unique_ptr<LabelTable>>;
std::unique_ptr<const FST> owned_fst_; // ptr to FST if owned.
const FST &fst_; // FST for matching.
StateId state_; // Matcher state.
MatchType match_type_; Arc loop_; // The implicit loop itself.
bool current_loop_; // Is the current arc the implicit loop?
bool error_; // Error encountered?
std::unique_ptr<ArcIterator<FST>> aiter_; std::shared_ptr<StateTable> state_table_; // Table from state to label table.
LabelTable *label_table_; // Pointer to current state's label table.
typename LabelTable::iterator label_it_; // Position for label.
typename LabelTable::iterator label_end_; // Position for last label + 1.
};
template <class FST> void HashMatcher<FST>::SetState(typename FST::Arc::StateId s) { if (state_ == s) return; // Resets everything for the state.
state_ = s; loop_.nextstate = state_; aiter_ = std::make_unique<ArcIterator<FST>>(fst_, state_); if (match_type_ == MATCH_NONE) { FSTERROR() << "HashMatcher: Bad match type"; error_ = true; } // Attempts to insert a new label table.
const auto &[it, success] = state_table_->emplace(state_, std::make_unique<LabelTable>()); // Sets instance's pointer to the label table for this state.
label_table_ = it->second.get(); // If it already exists, no additional work is done and we simply return.
if (!success) return; // Otherwise, populate this new table.
// Populates the label table.
label_table_->reserve(internal::NumArcs(fst_, state_)); const auto aiter_flags = (match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue) | kArcNoCache; aiter_->SetFlags(aiter_flags, kArcFlags); for (; !aiter_->Done(); aiter_->Next()) { label_table_->emplace(GetLabel(), aiter_->Position()); } aiter_->SetFlags(kArcValueFlags, kArcValueFlags); }
template <class FST> inline bool HashMatcher<FST>::Search(typename FST::Arc::Label match_label) { std::tie(label_it_, label_end_) = label_table_->equal_range(match_label); if (label_it_ == label_end_) return false; aiter_->Seek(label_it_->second); return true; }
// Specifies whether we rewrite both the input and output sides during matching.
enum MatcherRewriteMode { MATCHER_REWRITE_AUTO = 0, // Rewrites both sides iff acceptor.
MATCHER_REWRITE_ALWAYS, MATCHER_REWRITE_NEVER };
// For any requested label that doesn't match at a state, this matcher
// considers the *unique* transition that matches the label 'phi_label'
// (phi = 'fail'), and recursively looks for a match at its
// destination. When 'phi_loop' is true, if no match is found but a
// phi self-loop is found, then the phi transition found is returned
// with the phi_label rewritten as the requested label (both sides if
// an acceptor, or if 'rewrite_both' is true and both input and output
// labels of the found transition are 'phi_label'). If 'phi_label' is
// kNoLabel, this special matching is not done. PhiMatcher is
// templated itself on a matcher, which is used to perform the
// underlying matching. By default, the underlying matcher is
// constructed by PhiMatcher. The user can instead pass in this
// object; in that case, PhiMatcher takes its ownership.
// Phi non-determinism not supported. No non-consuming symbols other
// than epsilon supported with the underlying template argument matcher.
template <class M> class PhiMatcher : public MatcherBase<typename M::Arc> { public: using FST = typename M::FST; using Arc = typename FST::Arc; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight;
// This makes a copy of the FST (w/o 'matcher' arg).
PhiMatcher(const FST &fst, MatchType match_type, Label phi_label = kNoLabel, bool phi_loop = true, MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, M *matcher = nullptr) : matcher_(matcher ? matcher : new M(fst, match_type)), match_type_(match_type), phi_label_(phi_label), state_(kNoStateId), phi_loop_(phi_loop), error_(false) { if (match_type == MATCH_BOTH) { FSTERROR() << "PhiMatcher: Bad match type"; match_type_ = MATCH_NONE; error_ = true; } if (rewrite_mode == MATCHER_REWRITE_AUTO) { rewrite_both_ = fst.Properties(kAcceptor, true); } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) { rewrite_both_ = true; } else { rewrite_both_ = false; } }
// This doesn't copy the FST.
PhiMatcher(const FST *fst, MatchType match_type, Label phi_label = kNoLabel, bool phi_loop = true, MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, M *matcher = nullptr) : PhiMatcher(*fst, match_type, phi_label, phi_loop, rewrite_mode, matcher ? matcher : new M(fst, match_type)) {}
// This makes a copy of the FST.
PhiMatcher(const PhiMatcher &matcher, bool safe = false) : matcher_(new M(*matcher.matcher_, safe)), match_type_(matcher.match_type_), phi_label_(matcher.phi_label_), rewrite_both_(matcher.rewrite_both_), state_(kNoStateId), phi_loop_(matcher.phi_loop_), error_(matcher.error_) {}
PhiMatcher *Copy(bool safe = false) const override { return new PhiMatcher(*this, safe); }
MatchType Type(bool test) const override { return matcher_->Type(test); }
void SetState(StateId s) final { if (state_ == s) return; matcher_->SetState(s); state_ = s; has_phi_ = phi_label_ != kNoLabel; }
bool Find(Label match_label) final;
bool Done() const final { return matcher_->Done(); }
const Arc &Value() const final { if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) { return matcher_->Value(); } else if (phi_match_ == 0) { // Virtual epsilon loop.
phi_arc_ = Arc(kNoLabel, 0, Weight::One(), state_); if (match_type_ == MATCH_OUTPUT) { std::swap(phi_arc_.ilabel, phi_arc_.olabel); } return phi_arc_; } else { phi_arc_ = matcher_->Value(); phi_arc_.weight = Times(phi_weight_, phi_arc_.weight); if (phi_match_ != kNoLabel) { // Phi loop match.
if (rewrite_both_) { if (phi_arc_.ilabel == phi_label_) phi_arc_.ilabel = phi_match_; if (phi_arc_.olabel == phi_label_) phi_arc_.olabel = phi_match_; } else if (match_type_ == MATCH_INPUT) { phi_arc_.ilabel = phi_match_; } else { phi_arc_.olabel = phi_match_; } } return phi_arc_; } }
void Next() final { matcher_->Next(); }
Weight Final(StateId s) const final { auto weight = matcher_->Final(s); if (phi_label_ == kNoLabel || weight != Weight::Zero()) { return weight; } weight = Weight::One(); matcher_->SetState(s); while (matcher_->Final(s) == Weight::Zero()) { if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) break; weight = Times(weight, matcher_->Value().weight); if (s == matcher_->Value().nextstate) { return Weight::Zero(); // Does not follow phi self-loops.
} s = matcher_->Value().nextstate; matcher_->SetState(s); } weight = Times(weight, matcher_->Final(s)); return weight; }
ssize_t Priority(StateId s) final { if (phi_label_ != kNoLabel) { matcher_->SetState(s); const bool has_phi = matcher_->Find(phi_label_ == 0 ? -1 : phi_label_); return has_phi ? kRequirePriority : matcher_->Priority(s); } else { return matcher_->Priority(s); } }
const FST &GetFst() const override { return matcher_->GetFst(); }
uint64_t Properties(uint64_t props) const override;
uint32_t Flags() const override { if (phi_label_ == kNoLabel || match_type_ == MATCH_NONE) { return matcher_->Flags(); } return matcher_->Flags() | kRequireMatch; }
Label PhiLabel() const { return phi_label_; }
private: mutable std::unique_ptr<M> matcher_; MatchType match_type_; // Type of match requested.
Label phi_label_; // Label that represents the phi transition.
bool rewrite_both_; // Rewrite both sides when both are phi_label_?
bool has_phi_; // Are there possibly phis at the current state?
Label phi_match_; // Current label that matches phi loop.
mutable Arc phi_arc_; // Arc to return.
StateId state_; // Matcher state.
Weight phi_weight_; // Product of the weights of phi transitions taken.
bool phi_loop_; // When true, phi self-loop are allowed and treated
// as rho (required for Aho-Corasick).
bool error_; // Error encountered?
PhiMatcher &operator=(const PhiMatcher &) = delete; };
template <class M> inline bool PhiMatcher<M>::Find(Label label) { if (label == phi_label_ && phi_label_ != kNoLabel && phi_label_ != 0) { FSTERROR() << "PhiMatcher::Find: bad label (phi): " << phi_label_; error_ = true; return false; } matcher_->SetState(state_); phi_match_ = kNoLabel; phi_weight_ = Weight::One(); // If phi_label_ == 0, there are no more true epsilon arcs.
if (phi_label_ == 0) { if (label == kNoLabel) { return false; } if (label == 0) { // but a virtual epsilon loop needs to be returned.
if (!matcher_->Find(kNoLabel)) { return matcher_->Find(0); } else { phi_match_ = 0; return true; } } } if (!has_phi_ || label == 0 || label == kNoLabel) { return matcher_->Find(label); } auto s = state_; while (!matcher_->Find(label)) { // Look for phi transition (if phi_label_ == 0, we need to look
// for -1 to avoid getting the virtual self-loop)
if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) return false; if (phi_loop_ && matcher_->Value().nextstate == s) { phi_match_ = label; return true; } phi_weight_ = Times(phi_weight_, matcher_->Value().weight); s = matcher_->Value().nextstate; matcher_->Next(); if (!matcher_->Done()) { FSTERROR() << "PhiMatcher: Phi non-determinism not supported"; error_ = true; } matcher_->SetState(s); } return true; }
template <class M> inline uint64_t PhiMatcher<M>::Properties(uint64_t inprops) const { auto outprops = matcher_->Properties(inprops); if (error_) outprops |= kError; if (match_type_ == MATCH_NONE) { return outprops; } else if (match_type_ == MATCH_INPUT) { if (phi_label_ == 0) { outprops &= ~(kEpsilons | kIEpsilons | kOEpsilons); outprops |= kNoEpsilons | kNoIEpsilons; } if (rewrite_both_) { return outprops & ~(kODeterministic | kNonODeterministic | kString | kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted); } else { return outprops & ~(kODeterministic | kAcceptor | kString | kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted); } } else if (match_type_ == MATCH_OUTPUT) { if (phi_label_ == 0) { outprops &= ~(kEpsilons | kIEpsilons | kOEpsilons); outprops |= kNoEpsilons | kNoOEpsilons; } if (rewrite_both_) { return outprops & ~(kIDeterministic | kNonIDeterministic | kString | kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted); } else { return outprops & ~(kIDeterministic | kAcceptor | kString | kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted); } } else { // Shouldn't ever get here.
FSTERROR() << "PhiMatcher: Bad match type: " << match_type_; return 0; } }
// For any requested label that doesn't match at a state, this matcher
// considers all transitions that match the label 'rho_label' (rho =
// 'rest'). Each such rho transition found is returned with the
// rho_label rewritten as the requested label (both sides if an
// acceptor, or if 'rewrite_both' is true and both input and output
// labels of the found transition are 'rho_label'). If 'rho_label' is
// kNoLabel, this special matching is not done. RhoMatcher is
// templated itself on a matcher, which is used to perform the
// underlying matching. By default, the underlying matcher is
// constructed by RhoMatcher. The user can instead pass in this
// object; in that case, RhoMatcher takes its ownership.
// No non-consuming symbols other than epsilon supported with
// the underlying template argument matcher.
template <class M> class RhoMatcher : public MatcherBase<typename M::Arc> { public: using FST = typename M::FST; using Arc = typename FST::Arc; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight;
// This makes a copy of the FST (w/o 'matcher' arg).
RhoMatcher(const FST &fst, MatchType match_type, Label rho_label = kNoLabel, MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, M *matcher = nullptr) : matcher_(matcher ? matcher : new M(fst, match_type)), match_type_(match_type), rho_label_(rho_label), error_(false), state_(kNoStateId), has_rho_(false) { if (match_type == MATCH_BOTH) { FSTERROR() << "RhoMatcher: Bad match type"; match_type_ = MATCH_NONE; error_ = true; } if (rho_label == 0) { FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label"; rho_label_ = kNoLabel; error_ = true; } if (rewrite_mode == MATCHER_REWRITE_AUTO) { rewrite_both_ = fst.Properties(kAcceptor, true); } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) { rewrite_both_ = true; } else { rewrite_both_ = false; } }
// This doesn't copy the FST.
RhoMatcher(const FST *fst, MatchType match_type, Label rho_label = kNoLabel, MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, M *matcher = nullptr) : RhoMatcher(*fst, match_type, rho_label, rewrite_mode, matcher ? matcher : new M(fst, match_type)) {}
// This makes a copy of the FST.
RhoMatcher(const RhoMatcher &matcher, bool safe = false) : matcher_(new M(*matcher.matcher_, safe)), match_type_(matcher.match_type_), rho_label_(matcher.rho_label_), rewrite_both_(matcher.rewrite_both_), error_(matcher.error_), state_(kNoStateId), has_rho_(false) {}
RhoMatcher *Copy(bool safe = false) const override { return new RhoMatcher(*this, safe); }
MatchType Type(bool test) const override { return matcher_->Type(test); }
void SetState(StateId s) final { if (state_ == s) return; state_ = s; matcher_->SetState(s); has_rho_ = rho_label_ != kNoLabel; }
bool Find(Label label) final { if (label == rho_label_ && rho_label_ != kNoLabel) { FSTERROR() << "RhoMatcher::Find: bad label (rho)"; error_ = true; return false; } if (matcher_->Find(label)) { rho_match_ = kNoLabel; return true; } else if (has_rho_ && label != 0 && label != kNoLabel && (has_rho_ = matcher_->Find(rho_label_))) { rho_match_ = label; return true; } else { return false; } }
bool Done() const final { return matcher_->Done(); }
const Arc &Value() const final { if (rho_match_ == kNoLabel) { return matcher_->Value(); } else { rho_arc_ = matcher_->Value(); if (rewrite_both_) { if (rho_arc_.ilabel == rho_label_) rho_arc_.ilabel = rho_match_; if (rho_arc_.olabel == rho_label_) rho_arc_.olabel = rho_match_; } else if (match_type_ == MATCH_INPUT) { rho_arc_.ilabel = rho_match_; } else { rho_arc_.olabel = rho_match_; } return rho_arc_; } }
void Next() final { matcher_->Next(); }
Weight Final(StateId s) const final { return matcher_->Final(s); }
ssize_t Priority(StateId s) final { state_ = s; matcher_->SetState(s); has_rho_ = matcher_->Find(rho_label_); if (has_rho_) { return kRequirePriority; } else { return matcher_->Priority(s); } }
const FST &GetFst() const override { return matcher_->GetFst(); }
uint64_t Properties(uint64_t props) const override;
uint32_t Flags() const override { if (rho_label_ == kNoLabel || match_type_ == MATCH_NONE) { return matcher_->Flags(); } return matcher_->Flags() | kRequireMatch; }
Label RhoLabel() const { return rho_label_; }
private: std::unique_ptr<M> matcher_; MatchType match_type_; // Type of match requested.
Label rho_label_; // Label that represents the rho transition
bool rewrite_both_; // Rewrite both sides when both are rho_label_?
Label rho_match_; // Current label that matches rho transition.
mutable Arc rho_arc_; // Arc to return when rho match.
bool error_; // Error encountered?
StateId state_; // Matcher state.
bool has_rho_; // Are there possibly rhos at the current state?
};
template <class M> inline uint64_t RhoMatcher<M>::Properties(uint64_t inprops) const { auto outprops = matcher_->Properties(inprops); if (error_) outprops |= kError; if (match_type_ == MATCH_NONE) { return outprops; } else if (match_type_ == MATCH_INPUT) { if (rewrite_both_) { return outprops & ~(kODeterministic | kNonODeterministic | kString | kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted); } else { return outprops & ~(kODeterministic | kAcceptor | kString | kILabelSorted | kNotILabelSorted); } } else if (match_type_ == MATCH_OUTPUT) { if (rewrite_both_) { return outprops & ~(kIDeterministic | kNonIDeterministic | kString | kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted); } else { return outprops & ~(kIDeterministic | kAcceptor | kString | kOLabelSorted | kNotOLabelSorted); } } else { // Shouldn't ever get here.
FSTERROR() << "RhoMatcher: Bad match type: " << match_type_; return 0; } }
// For any requested label, this matcher considers all transitions
// that match the label 'sigma_label' (sigma = "any"), and this in
// additions to transitions with the requested label. Each such sigma
// transition found is returned with the sigma_label rewritten as the
// requested label (both sides if an acceptor, or if 'rewrite_both' is
// true and both input and output labels of the found transition are
// 'sigma_label'). If 'sigma_label' is kNoLabel, this special
// matching is not done. SigmaMatcher is templated itself on a
// matcher, which is used to perform the underlying matching. By
// default, the underlying matcher is constructed by SigmaMatcher.
// The user can instead pass in this object; in that case,
// SigmaMatcher takes its ownership. No non-consuming symbols other
// than epsilon supported with the underlying template argument matcher.
template <class M> class SigmaMatcher : public MatcherBase<typename M::Arc> { public: using FST = typename M::FST; using Arc = typename FST::Arc; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight;
// This makes a copy of the FST (w/o 'matcher' arg).
SigmaMatcher(const FST &fst, MatchType match_type, Label sigma_label = kNoLabel, MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, M *matcher = nullptr) : matcher_(matcher ? matcher : new M(fst, match_type)), match_type_(match_type), sigma_label_(sigma_label), error_(false), state_(kNoStateId) { if (match_type == MATCH_BOTH) { FSTERROR() << "SigmaMatcher: Bad match type"; match_type_ = MATCH_NONE; error_ = true; } if (sigma_label == 0) { FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label"; sigma_label_ = kNoLabel; error_ = true; } if (rewrite_mode == MATCHER_REWRITE_AUTO) { rewrite_both_ = fst.Properties(kAcceptor, true); } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) { rewrite_both_ = true; } else { rewrite_both_ = false; } }
// This doesn't copy the FST.
SigmaMatcher(const FST *fst, MatchType match_type, Label sigma_label = kNoLabel, MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, M *matcher = nullptr) : SigmaMatcher(*fst, match_type, sigma_label, rewrite_mode, matcher ? matcher : new M(fst, match_type)) {}
// This makes a copy of the FST.
SigmaMatcher(const SigmaMatcher &matcher, bool safe = false) : matcher_(new M(*matcher.matcher_, safe)), match_type_(matcher.match_type_), sigma_label_(matcher.sigma_label_), rewrite_both_(matcher.rewrite_both_), error_(matcher.error_), state_(kNoStateId) {}
SigmaMatcher *Copy(bool safe = false) const override { return new SigmaMatcher(*this, safe); }
MatchType Type(bool test) const override { return matcher_->Type(test); }
void SetState(StateId s) final { if (state_ == s) return; state_ = s; matcher_->SetState(s); has_sigma_ = (sigma_label_ != kNoLabel) ? matcher_->Find(sigma_label_) : false; }
bool Find(Label match_label) final { match_label_ = match_label; if (match_label == sigma_label_ && sigma_label_ != kNoLabel) { FSTERROR() << "SigmaMatcher::Find: bad label (sigma)"; error_ = true; return false; } if (matcher_->Find(match_label)) { sigma_match_ = kNoLabel; return true; } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel && matcher_->Find(sigma_label_)) { sigma_match_ = match_label; return true; } else { return false; } }
bool Done() const final { return matcher_->Done(); }
const Arc &Value() const final { if (sigma_match_ == kNoLabel) { return matcher_->Value(); } else { sigma_arc_ = matcher_->Value(); if (rewrite_both_) { if (sigma_arc_.ilabel == sigma_label_) sigma_arc_.ilabel = sigma_match_; if (sigma_arc_.olabel == sigma_label_) sigma_arc_.olabel = sigma_match_; } else if (match_type_ == MATCH_INPUT) { sigma_arc_.ilabel = sigma_match_; } else { sigma_arc_.olabel = sigma_match_; } return sigma_arc_; } }
void Next() final { matcher_->Next(); if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) && (match_label_ > 0)) { matcher_->Find(sigma_label_); sigma_match_ = match_label_; } }
Weight Final(StateId s) const final { return matcher_->Final(s); }
ssize_t Priority(StateId s) final { if (sigma_label_ != kNoLabel) { SetState(s); return has_sigma_ ? kRequirePriority : matcher_->Priority(s); } else { return matcher_->Priority(s); } }
const FST &GetFst() const override { return matcher_->GetFst(); }
uint64_t Properties(uint64_t props) const override;
uint32_t Flags() const override { if (sigma_label_ == kNoLabel || match_type_ == MATCH_NONE) { return matcher_->Flags(); } return matcher_->Flags() | kRequireMatch; }
Label SigmaLabel() const { return sigma_label_; }
private: std::unique_ptr<M> matcher_; MatchType match_type_; // Type of match requested.
Label sigma_label_; // Label that represents the sigma transition.
bool rewrite_both_; // Rewrite both sides when both are sigma_label_?
bool has_sigma_; // Are there sigmas at the current state?
Label sigma_match_; // Current label that matches sigma transition.
mutable Arc sigma_arc_; // Arc to return when sigma match.
Label match_label_; // Label being matched.
bool error_; // Error encountered?
StateId state_; // Matcher state.
};
template <class M> inline uint64_t SigmaMatcher<M>::Properties(uint64_t inprops) const { auto outprops = matcher_->Properties(inprops); if (error_) outprops |= kError; if (match_type_ == MATCH_NONE) { return outprops; } else if (rewrite_both_) { return outprops & ~(kIDeterministic | kNonIDeterministic | kODeterministic | kNonODeterministic | kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kString); } else if (match_type_ == MATCH_INPUT) { return outprops & ~(kIDeterministic | kNonIDeterministic | kODeterministic | kNonODeterministic | kILabelSorted | kNotILabelSorted | kString | kAcceptor); } else if (match_type_ == MATCH_OUTPUT) { return outprops & ~(kIDeterministic | kNonIDeterministic | kODeterministic | kNonODeterministic | kOLabelSorted | kNotOLabelSorted | kString | kAcceptor); } else { // Shouldn't ever get here.
FSTERROR() << "SigmaMatcher: Bad match type: " << match_type_; return 0; } }
// Flags for MultiEpsMatcher.
// Return multi-epsilon arcs for Find(kNoLabel).
inline constexpr uint32_t kMultiEpsList = 0x00000001;
// Return a kNolabel loop for Find(multi_eps).
inline constexpr uint32_t kMultiEpsLoop = 0x00000002;
// MultiEpsMatcher: allows treating multiple non-0 labels as
// non-consuming labels in addition to 0 that is always
// non-consuming. Precise behavior controlled by 'flags' argument. By
// default, the underlying matcher is constructed by
// MultiEpsMatcher. The user can instead pass in this object; in that
// case, MultiEpsMatcher takes its ownership iff 'own_matcher' is
// true.
template <class M> class MultiEpsMatcher { public: using FST = typename M::FST; using Arc = typename FST::Arc; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight;
// This makes a copy of the FST (w/o 'matcher' arg).
MultiEpsMatcher(const FST &fst, MatchType match_type, uint32_t flags = (kMultiEpsLoop | kMultiEpsList), M *matcher = nullptr, bool own_matcher = true) : matcher_(matcher ? matcher : new M(fst, match_type)), flags_(flags), own_matcher_(matcher ? own_matcher : true) { Init(match_type); }
// This doesn't copy the FST.
MultiEpsMatcher(const FST *fst, MatchType match_type, uint32_t flags = (kMultiEpsLoop | kMultiEpsList), M *matcher = nullptr, bool own_matcher = true) : matcher_(matcher ? matcher : new M(fst, match_type)), flags_(flags), own_matcher_(matcher ? own_matcher : true) { Init(match_type); }
// This makes a copy of the FST.
MultiEpsMatcher(const MultiEpsMatcher &matcher, bool safe = false) : matcher_(new M(*matcher.matcher_, safe)), flags_(matcher.flags_), own_matcher_(true), multi_eps_labels_(matcher.multi_eps_labels_), loop_(matcher.loop_) { loop_.nextstate = kNoStateId; }
~MultiEpsMatcher() { if (own_matcher_) delete matcher_; }
MultiEpsMatcher *Copy(bool safe = false) const { return new MultiEpsMatcher(*this, safe); }
MatchType Type(bool test) const { return matcher_->Type(test); }
void SetState(StateId state) { matcher_->SetState(state); loop_.nextstate = state; }
bool Find(Label label);
bool Done() const { return done_; }
const Arc &Value() const { return current_loop_ ? loop_ : matcher_->Value(); }
void Next() { if (!current_loop_) { matcher_->Next(); done_ = matcher_->Done(); if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) { ++multi_eps_iter_; while ((multi_eps_iter_ != multi_eps_labels_.End()) && !matcher_->Find(*multi_eps_iter_)) { ++multi_eps_iter_; } if (multi_eps_iter_ != multi_eps_labels_.End()) { done_ = false; } else { done_ = !matcher_->Find(kNoLabel); } } } else { done_ = true; } }
const FST &GetFst() const { return matcher_->GetFst(); }
uint64_t Properties(uint64_t props) const { return matcher_->Properties(props); }
const M *GetMatcher() const { return matcher_; }
Weight Final(StateId s) const { return matcher_->Final(s); }
uint32_t Flags() const { return matcher_->Flags(); }
ssize_t Priority(StateId s) { return matcher_->Priority(s); }
void AddMultiEpsLabel(Label label) { if (label == 0) { FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0"; } else { multi_eps_labels_.Insert(label); } }
void RemoveMultiEpsLabel(Label label) { if (label == 0) { FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0"; } else { multi_eps_labels_.Erase(label); } }
void ClearMultiEpsLabels() { multi_eps_labels_.Clear(); }
private: void Init(MatchType match_type) { if (match_type == MATCH_INPUT) { loop_.ilabel = kNoLabel; loop_.olabel = 0; } else { loop_.ilabel = 0; loop_.olabel = kNoLabel; } loop_.weight = Weight::One(); loop_.nextstate = kNoStateId; }
M *matcher_; uint32_t flags_; bool own_matcher_; // Does this class delete the matcher?
// Multi-eps label set.
CompactSet<Label, kNoLabel> multi_eps_labels_; typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_;
bool current_loop_; // Current arc is the implicit loop?
mutable Arc loop_; // For non-consuming symbols.
bool done_; // Matching done?
MultiEpsMatcher &operator=(const MultiEpsMatcher &) = delete; };
template <class M> inline bool MultiEpsMatcher<M>::Find(Label label) { multi_eps_iter_ = multi_eps_labels_.End(); current_loop_ = false; bool ret; if (label == 0) { ret = matcher_->Find(0); } else if (label == kNoLabel) { if (flags_ & kMultiEpsList) { // Returns all non-consuming arcs (including epsilon).
multi_eps_iter_ = multi_eps_labels_.Begin(); while ((multi_eps_iter_ != multi_eps_labels_.End()) && !matcher_->Find(*multi_eps_iter_)) { ++multi_eps_iter_; } if (multi_eps_iter_ != multi_eps_labels_.End()) { ret = true; } else { ret = matcher_->Find(kNoLabel); } } else { // Returns all epsilon arcs.
ret = matcher_->Find(kNoLabel); } } else if ((flags_ & kMultiEpsLoop) && multi_eps_labels_.Find(label) != multi_eps_labels_.End()) { // Returns implicit loop.
current_loop_ = true; ret = true; } else { ret = matcher_->Find(label); } done_ = !ret; return ret; }
// This class discards any implicit matches (e.g., the implicit epsilon
// self-loops in the SortedMatcher). Matchers are most often used in
// composition/intersection where the implicit matches are needed
// e.g. for epsilon processing. However, if a matcher is simply being
// used to look-up explicit label matches, this class saves the user
// from having to check for and discard the unwanted implicit matches
// themselves.
template <class M> class ExplicitMatcher : public MatcherBase<typename M::Arc> { public: using FST = typename M::FST; using Arc = typename FST::Arc; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight;
// This makes a copy of the FST.
ExplicitMatcher(const FST &fst, MatchType match_type, M *matcher = nullptr) : matcher_(matcher ? matcher : new M(fst, match_type)), match_type_(match_type), error_(false) {}
// This doesn't copy the FST.
ExplicitMatcher(const FST *fst, MatchType match_type, M *matcher = nullptr) : matcher_(matcher ? matcher : new M(fst, match_type)), match_type_(match_type), error_(false) {}
// This makes a copy of the FST.
ExplicitMatcher(const ExplicitMatcher &matcher, bool safe = false) : matcher_(new M(*matcher.matcher_, safe)), match_type_(matcher.match_type_), error_(matcher.error_) {}
ExplicitMatcher *Copy(bool safe = false) const override { return new ExplicitMatcher(*this, safe); }
MatchType Type(bool test) const override { return matcher_->Type(test); }
void SetState(StateId s) final { matcher_->SetState(s); }
bool Find(Label label) final { matcher_->Find(label); CheckArc(); return !Done(); }
bool Done() const final { return matcher_->Done(); }
const Arc &Value() const final { return matcher_->Value(); }
void Next() final { matcher_->Next(); CheckArc(); }
Weight Final(StateId s) const final { return matcher_->Final(s); }
ssize_t Priority(StateId s) final { return matcher_->Priority(s); }
const FST &GetFst() const final { return matcher_->GetFst(); }
uint64_t Properties(uint64_t inprops) const override { return matcher_->Properties(inprops); }
const M *GetMatcher() const { return matcher_.get(); }
uint32_t Flags() const override { return matcher_->Flags(); }
private: // Checks current arc if available and explicit. If not available, stops. If
// not explicit, checks next ones.
void CheckArc() { for (; !matcher_->Done(); matcher_->Next()) { const auto label = match_type_ == MATCH_INPUT ? matcher_->Value().ilabel : matcher_->Value().olabel; if (label != kNoLabel) return; } }
std::unique_ptr<M> matcher_; MatchType match_type_; // Type of match requested.
bool error_; // Error encountered?
};
// Generic matcher, templated on the FST definition.
//
// Here is a typical use:
//
// Matcher<StdFst> matcher(fst, MATCH_INPUT);
// matcher.SetState(state);
// if (matcher.Find(label))
// for (; !matcher.Done(); matcher.Next()) {
// auto &arc = matcher.Value();
// ...
// }
template <class F> class Matcher { public: using FST = F; using Arc = typename F::Arc; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight;
// This makes a copy of the FST.
Matcher(const FST &fst, MatchType match_type) : owned_fst_(fst.Copy()), base_(owned_fst_->InitMatcher(match_type)) { if (!base_) base_ = std::make_unique<SortedMatcher<FST>>(owned_fst_.get(), match_type); }
// This doesn't copy the FST.
Matcher(const FST *fst, MatchType match_type) : base_(fst->InitMatcher(match_type)) { if (!base_) base_ = std::make_unique<SortedMatcher<FST>>(fst, match_type); }
// This makes a copy of the FST.
Matcher(const Matcher &matcher, bool safe = false) : base_(matcher.base_->Copy(safe)) {}
// Takes ownership of the provided matcher.
explicit Matcher(MatcherBase<Arc> *base_matcher) : base_(base_matcher) {}
Matcher *Copy(bool safe = false) const { return new Matcher(*this, safe); }
MatchType Type(bool test) const { return base_->Type(test); }
void SetState(StateId s) { base_->SetState(s); }
bool Find(Label label) { return base_->Find(label); }
bool Done() const { return base_->Done(); }
const Arc &Value() const { return base_->Value(); }
void Next() { base_->Next(); }
const FST &GetFst() const { return down_cast<const FST &>(base_->GetFst()); }
uint64_t Properties(uint64_t props) const { return base_->Properties(props); }
Weight Final(StateId s) const { return base_->Final(s); }
uint32_t Flags() const { return base_->Flags() & kMatcherFlags; }
ssize_t Priority(StateId s) { return base_->Priority(s); }
private: std::unique_ptr<const FST> owned_fst_; std::unique_ptr<MatcherBase<Arc>> base_; };
} // namespace fst
#endif // FST_MATCHER_H_
|