|
// Copyright 2005-2024 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the 'License');
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an 'AS IS' BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
// See www.openfst.org for extensive documentation on this weighted
|
|
// finite-state transducer library.
|
|
//
|
|
// Classes for representing the mapping between state tuples and state IDs.
|
|
|
|
#ifndef FST_STATE_TABLE_H_
|
|
#define FST_STATE_TABLE_H_
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <cstddef>
|
|
#include <deque>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include <fst/log.h>
|
|
#include <fst/bi-table.h>
|
|
#include <fst/expanded-fst.h>
|
|
#include <fst/filter-state.h>
|
|
#include <fst/fst.h>
|
|
#include <fst/properties.h>
|
|
#include <fst/util.h>
|
|
|
|
namespace fst {
|
|
|
|
// State tables determine the bijective mapping between state tuples (e.g., in
|
|
// composition, triples of two FST states and a composition filter state) and
|
|
// their corresponding state IDs. They are classes, templated on state tuples,
|
|
// with the following interface:
|
|
//
|
|
// template <class T>
|
|
// class StateTable {
|
|
// public:
|
|
// using StateTuple = T;
|
|
//
|
|
// // Required constructors.
|
|
// StateTable();
|
|
//
|
|
// StateTable(const StateTable &);
|
|
//
|
|
// // Looks up state ID by tuple. If it doesn't exist, then add it.
|
|
// StateId FindState(const StateTuple &tuple);
|
|
//
|
|
// // Looks up state tuple by state ID.
|
|
// const StateTuple<StateId> &Tuple(StateId s) const;
|
|
//
|
|
// // # of stored tuples.
|
|
// StateId Size() const;
|
|
// };
|
|
//
|
|
// A state tuple has the form:
|
|
//
|
|
// template <class S>
|
|
// struct StateTuple {
|
|
// using StateId = S;
|
|
//
|
|
// // Required constructors.
|
|
//
|
|
// StateTuple();
|
|
//
|
|
// StateTuple(const StateTuple &tuple);
|
|
// };
|
|
|
|
// An implementation using a hash map for the tuple to state ID mapping. The
|
|
// state tuple T must support operator==.
|
|
template <class T, class H>
|
|
class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
|
|
public:
|
|
using StateTuple = T;
|
|
using StateId = typename StateTuple::StateId;
|
|
|
|
using HashBiTable<StateId, StateTuple, H>::FindId;
|
|
using HashBiTable<StateId, StateTuple, H>::FindEntry;
|
|
using HashBiTable<StateId, StateTuple, H>::Size;
|
|
|
|
HashStateTable() : HashBiTable<StateId, StateTuple, H>() {}
|
|
|
|
explicit HashStateTable(size_t table_size)
|
|
: HashBiTable<StateId, StateTuple, H>(table_size) {}
|
|
|
|
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
|
|
|
|
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
|
|
};
|
|
|
|
// An implementation using a hash map for the tuple to state ID mapping. The
|
|
// state tuple T must support operator==.
|
|
template <class T, class H>
|
|
class CompactHashStateTable
|
|
: public CompactHashBiTable<typename T::StateId, T, H> {
|
|
public:
|
|
using StateTuple = T;
|
|
using StateId = typename StateTuple::StateId;
|
|
|
|
using CompactHashBiTable<StateId, StateTuple, H>::FindId;
|
|
using CompactHashBiTable<StateId, StateTuple, H>::FindEntry;
|
|
using CompactHashBiTable<StateId, StateTuple, H>::Size;
|
|
|
|
CompactHashStateTable() : CompactHashBiTable<StateId, StateTuple, H>() {}
|
|
|
|
explicit CompactHashStateTable(size_t table_size)
|
|
: CompactHashBiTable<StateId, StateTuple, H>(table_size) {}
|
|
|
|
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
|
|
|
|
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
|
|
};
|
|
|
|
// An implementation using a vector for the tuple to state mapping. It is
|
|
// passed a fingerprint functor that should fingerprint tuples uniquely to an
|
|
// integer that can used as a vector index. Normally, VectorStateTable
|
|
// constructs the fingerprint functor. Alternately, the user can pass this
|
|
// object, in which case the table takes ownership.
|
|
template <class T, class FP>
|
|
class VectorStateTable : public VectorBiTable<typename T::StateId, T, FP> {
|
|
public:
|
|
using StateTuple = T;
|
|
using StateId = typename StateTuple::StateId;
|
|
|
|
using VectorBiTable<StateId, StateTuple, FP>::FindId;
|
|
using VectorBiTable<StateId, StateTuple, FP>::FindEntry;
|
|
using VectorBiTable<StateId, StateTuple, FP>::Size;
|
|
using VectorBiTable<StateId, StateTuple, FP>::Fingerprint;
|
|
|
|
explicit VectorStateTable(const FP &fingerprint = FP(), size_t table_size = 0)
|
|
: VectorBiTable<StateId, StateTuple, FP>(fingerprint, table_size) {}
|
|
|
|
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
|
|
|
|
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
|
|
};
|
|
|
|
// An implementation using a vector and a compact hash table. The selection
|
|
// functor returns true for tuples to be hashed in the vector. The fingerprint
|
|
// functor should fingerprint tuples uniquely to an integer that can be used as
|
|
// a vector index. A hash functor is used when hashing tuples into the compact
|
|
// hash table.
|
|
template <class T, class Select, class FP, class H>
|
|
class VectorHashStateTable
|
|
: public VectorHashBiTable<typename T::StateId, T, Select, FP, H> {
|
|
public:
|
|
using StateTuple = T;
|
|
using StateId = typename StateTuple::StateId;
|
|
|
|
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::FindId;
|
|
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::FindEntry;
|
|
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Size;
|
|
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Selector;
|
|
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Fingerprint;
|
|
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::HashFunction;
|
|
|
|
VectorHashStateTable(const Select &select, const FP &fingerprint,
|
|
const H &hash, size_t vector_size = 0,
|
|
size_t tuple_size = 0)
|
|
: VectorHashBiTable<StateId, StateTuple, Select, FP, H>(
|
|
select, fingerprint, hash, vector_size, tuple_size) {}
|
|
|
|
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
|
|
|
|
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
|
|
};
|
|
|
|
// An implementation using a hash map to map from tuples to state IDs. This
|
|
// version permits erasing of states. The state tuple's default constructor
|
|
// must produce a tuple that will never be seen and the table must suppor
|
|
// operator==.
|
|
template <class T, class H>
|
|
class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, H> {
|
|
public:
|
|
using StateTuple = T;
|
|
using StateId = typename StateTuple::StateId;
|
|
|
|
using ErasableBiTable<StateId, StateTuple, H>::FindId;
|
|
using ErasableBiTable<StateId, StateTuple, H>::FindEntry;
|
|
using ErasableBiTable<StateId, StateTuple, H>::Size;
|
|
using ErasableBiTable<StateId, StateTuple, H>::Erase;
|
|
|
|
ErasableStateTable() : ErasableBiTable<StateId, StateTuple, H>() {}
|
|
|
|
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
|
|
|
|
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
|
|
};
|
|
|
|
// The composition state table has the form:
|
|
//
|
|
// template <class Arc, class FilterState>
|
|
// class ComposeStateTable {
|
|
// public:
|
|
// using StateId = typename Arc::StateId;
|
|
//
|
|
// // Required constructors.
|
|
//
|
|
// ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
|
|
// ComposeStateTable(const ComposeStateTable<Arc, FilterState> &table);
|
|
//
|
|
// // Looks up a state ID by tuple, adding it if doesn't exist.
|
|
// StateId FindState(const StateTuple &tuple);
|
|
//
|
|
// // Looks up a tuple by state ID.
|
|
// const ComposeStateTuple<StateId> &Tuple(StateId s) const;
|
|
//
|
|
// // The number of stored tuples.
|
|
// StateId Size() const;
|
|
//
|
|
// // Return true if error was encountered.
|
|
// bool Error() const;
|
|
// };
|
|
//
|
|
// The following interface is used to represent the composition state.
|
|
//
|
|
// template <class S, class FS>
|
|
// class CompositionStateTuple {
|
|
// public:
|
|
// using StateId = typename StateId;
|
|
// using FS = FilterState;
|
|
//
|
|
// // Required constructors.
|
|
// StateTuple();
|
|
// StateTuple(StateId s1, StateId s2, const FilterState &fs);
|
|
//
|
|
// StateId StateId1() const;
|
|
// StateId StateId2() const;
|
|
//
|
|
// FilterState GetFilterState() const;
|
|
//
|
|
// std::pair<StateId, StateId> StatePair() const;
|
|
//
|
|
// size_t Hash() const;
|
|
//
|
|
// friend bool operator==(const StateTuple& x, const StateTuple &y);
|
|
// }
|
|
//
|
|
template <typename S, typename FS>
|
|
class DefaultComposeStateTuple {
|
|
public:
|
|
using StateId = S;
|
|
using FilterState = FS;
|
|
|
|
DefaultComposeStateTuple()
|
|
: state_pair_(kNoStateId, kNoStateId), fs_(FilterState::NoState()) {}
|
|
|
|
DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &fs)
|
|
: state_pair_(s1, s2), fs_(fs) {}
|
|
|
|
StateId StateId1() const { return state_pair_.first; }
|
|
|
|
StateId StateId2() const { return state_pair_.second; }
|
|
|
|
FilterState GetFilterState() const { return fs_; }
|
|
|
|
const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
|
|
|
|
friend bool operator==(const DefaultComposeStateTuple &x,
|
|
const DefaultComposeStateTuple &y) {
|
|
return (&x == &y) || (x.state_pair_ == y.state_pair_ && x.fs_ == y.fs_);
|
|
}
|
|
|
|
size_t Hash() const {
|
|
return static_cast<size_t>(StateId1()) +
|
|
static_cast<size_t>(StateId2()) * 7853u +
|
|
GetFilterState().Hash() * 7867u;
|
|
}
|
|
|
|
private:
|
|
std::pair<StateId, StateId> state_pair_;
|
|
FilterState fs_; // State of composition filter.
|
|
};
|
|
|
|
// Specialization for TrivialFilterState that does not explicitly store the
|
|
// filter state since it is always the unique non-blocking state.
|
|
template <typename S>
|
|
class DefaultComposeStateTuple<S, TrivialFilterState> {
|
|
public:
|
|
using StateId = S;
|
|
using FilterState = TrivialFilterState;
|
|
|
|
DefaultComposeStateTuple() : state_pair_(kNoStateId, kNoStateId) {}
|
|
|
|
DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &)
|
|
: state_pair_(s1, s2) {}
|
|
|
|
StateId StateId1() const { return state_pair_.first; }
|
|
|
|
StateId StateId2() const { return state_pair_.second; }
|
|
|
|
FilterState GetFilterState() const { return FilterState(true); }
|
|
|
|
const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
|
|
|
|
friend bool operator==(const DefaultComposeStateTuple &x,
|
|
const DefaultComposeStateTuple &y) {
|
|
return (&x == &y) || (x.state_pair_ == y.state_pair_);
|
|
}
|
|
|
|
size_t Hash() const { return StateId1() + StateId2() * size_t{7853}; }
|
|
|
|
private:
|
|
std::pair<StateId, StateId> state_pair_;
|
|
};
|
|
|
|
// Hashing of composition state tuples.
|
|
template <typename T>
|
|
class ComposeHash {
|
|
public:
|
|
size_t operator()(const T &t) const { return t.Hash(); }
|
|
};
|
|
|
|
// A HashStateTable over composition tuples.
|
|
template <typename Arc, typename FilterState,
|
|
typename StateTuple =
|
|
DefaultComposeStateTuple<typename Arc::StateId, FilterState>,
|
|
typename StateTable =
|
|
CompactHashStateTable<StateTuple, ComposeHash<StateTuple>>>
|
|
class GenericComposeStateTable : public StateTable {
|
|
public:
|
|
using StateId = typename Arc::StateId;
|
|
|
|
GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
|
|
|
|
GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
|
|
size_t table_size)
|
|
: StateTable(table_size) {}
|
|
|
|
constexpr bool Error() const { return false; }
|
|
|
|
private:
|
|
GenericComposeStateTable &operator=(const GenericComposeStateTable &table) =
|
|
delete;
|
|
};
|
|
|
|
// Fingerprint for general composition tuples.
|
|
template <typename StateTuple>
|
|
class ComposeFingerprint {
|
|
public:
|
|
using StateId = typename StateTuple::StateId;
|
|
|
|
// Required but suboptimal constructor.
|
|
ComposeFingerprint() : mult1_(8192), mult2_(8192) {
|
|
LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
|
|
}
|
|
|
|
// Constructor is provided the sizes of the input FSTs.
|
|
ComposeFingerprint(StateId nstates1, StateId nstates2)
|
|
: mult1_(nstates1), mult2_(nstates1 * nstates2) {}
|
|
|
|
size_t operator()(const StateTuple &tuple) const {
|
|
return tuple.StateId1() + tuple.StateId2() * mult1_ +
|
|
tuple.GetFilterState().Hash() * mult2_;
|
|
}
|
|
|
|
private:
|
|
const ssize_t mult1_;
|
|
const ssize_t mult2_;
|
|
};
|
|
|
|
// Useful when the first composition state determines the tuple.
|
|
template <typename StateTuple>
|
|
class ComposeState1Fingerprint {
|
|
public:
|
|
size_t operator()(const StateTuple &tuple) { return tuple.StateId1(); }
|
|
};
|
|
|
|
// Useful when the second composition state determines the tuple.
|
|
template <typename StateTuple>
|
|
class ComposeState2Fingerprint {
|
|
public:
|
|
size_t operator()(const StateTuple &tuple) { return tuple.StateId2(); }
|
|
};
|
|
|
|
// A VectorStateTable over composition tuples. This can be used when the
|
|
// product of number of states in FST1 and FST2 (and the composition filter
|
|
// state hash) is manageable. If the FSTs are not expanded FSTs, they will
|
|
// first have their states counted.
|
|
template <typename Arc, typename StateTuple>
|
|
class ProductComposeStateTable
|
|
: public VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>> {
|
|
public:
|
|
using StateId = typename Arc::StateId;
|
|
using StateTable =
|
|
VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>>;
|
|
|
|
ProductComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
|
|
size_t table_size = 0)
|
|
: StateTable(ComposeFingerprint<StateTuple>(CountStates(fst1),
|
|
CountStates(fst2)),
|
|
table_size) {}
|
|
|
|
ProductComposeStateTable(
|
|
const ProductComposeStateTable<Arc, StateTuple> &table)
|
|
: StateTable(ComposeFingerprint<StateTuple>(table.Fingerprint())) {}
|
|
|
|
constexpr bool Error() const { return false; }
|
|
|
|
private:
|
|
ProductComposeStateTable &operator=(const ProductComposeStateTable &table) =
|
|
delete;
|
|
};
|
|
|
|
// A vector-backed table over composition tuples which can be used when the
|
|
// first FST is a string (i.e., satisfies kString property) and the second is
|
|
// deterministic and epsilon-free. It should be used with a composition filter
|
|
// that creates at most one filter state per tuple under these conditions (e.g.,
|
|
// SequenceComposeFilter or MatchComposeFilter).
|
|
template <typename Arc, typename StateTuple>
|
|
class StringDetComposeStateTable
|
|
: public VectorStateTable<StateTuple,
|
|
ComposeState1Fingerprint<StateTuple>> {
|
|
public:
|
|
using StateId = typename Arc::StateId;
|
|
using StateTable =
|
|
VectorStateTable<StateTuple, ComposeState1Fingerprint<StateTuple>>;
|
|
|
|
StringDetComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
|
|
: error_(false) {
|
|
static constexpr auto props2 = kIDeterministic | kNoIEpsilons;
|
|
if (fst1.Properties(kString, true) != kString) {
|
|
FSTERROR() << "StringDetComposeStateTable: 1st FST is not a string";
|
|
error_ = true;
|
|
} else if (fst2.Properties(props2, true) != props2) {
|
|
FSTERROR() << "StringDetComposeStateTable: 2nd FST is not deterministic "
|
|
"and epsilon-free";
|
|
error_ = true;
|
|
}
|
|
}
|
|
|
|
StringDetComposeStateTable(
|
|
const StringDetComposeStateTable<Arc, StateTuple> &table)
|
|
: StateTable(table), error_(table.error_) {}
|
|
|
|
bool Error() const { return error_; }
|
|
|
|
private:
|
|
bool error_;
|
|
|
|
StringDetComposeStateTable &operator=(const StringDetComposeStateTable &) =
|
|
delete;
|
|
};
|
|
|
|
// A vector-backed table over composition tuples which can be used when the
|
|
// first FST is deterministic and epsilon-free and the second is a string (i.e.,
|
|
// satisfies kString). It should be used with a composition filter that creates
|
|
// at most one filter state per tuple under these conditions (e.g.,
|
|
// SequenceComposeFilter or MatchComposeFilter).
|
|
template <typename Arc, typename StateTuple>
|
|
class DetStringComposeStateTable
|
|
: public VectorStateTable<StateTuple,
|
|
ComposeState2Fingerprint<StateTuple>> {
|
|
public:
|
|
using StateId = typename Arc::StateId;
|
|
using StateTable =
|
|
VectorStateTable<StateTuple, ComposeState2Fingerprint<StateTuple>>;
|
|
|
|
DetStringComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
|
|
: error_(false) {
|
|
static constexpr auto props = kODeterministic | kNoOEpsilons;
|
|
if (fst1.Properties(props, true) != props) {
|
|
FSTERROR() << "StringDetComposeStateTable: 1st FST is not "
|
|
<< "input-deterministic and epsilon-free";
|
|
error_ = true;
|
|
} else if (fst2.Properties(kString, true) != kString) {
|
|
FSTERROR() << "DetStringComposeStateTable: 2nd FST is not a string";
|
|
error_ = true;
|
|
}
|
|
}
|
|
|
|
DetStringComposeStateTable(
|
|
const DetStringComposeStateTable<Arc, StateTuple> &table)
|
|
: StateTable(table), error_(table.error_) {}
|
|
|
|
bool Error() const { return error_; }
|
|
|
|
private:
|
|
bool error_;
|
|
|
|
DetStringComposeStateTable &operator=(const DetStringComposeStateTable &) =
|
|
delete;
|
|
};
|
|
|
|
// An erasable table over composition tuples. The Erase(StateId) method can be
|
|
// called if the user either is sure that composition will never return to that
|
|
// tuple or doesn't care that if it does, it is assigned a new state ID.
|
|
template <typename Arc, typename StateTuple>
|
|
class ErasableComposeStateTable
|
|
: public ErasableStateTable<StateTuple, ComposeHash<StateTuple>> {
|
|
public:
|
|
ErasableComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
|
|
|
|
constexpr bool Error() const { return false; }
|
|
|
|
private:
|
|
ErasableComposeStateTable &operator=(const ErasableComposeStateTable &table) =
|
|
delete;
|
|
};
|
|
|
|
} // namespace fst
|
|
|
|
#endif // FST_STATE_TABLE_H_
|