You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

512 lines
17 KiB

// Copyright 2005-2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the 'License');
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an 'AS IS' BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// See www.openfst.org for extensive documentation on this weighted
// finite-state transducer library.
//
// Classes for representing the mapping between state tuples and state IDs.
#ifndef FST_STATE_TABLE_H_
#define FST_STATE_TABLE_H_
#include <sys/types.h>
#include <cstddef>
#include <deque>
#include <utility>
#include <vector>
#include <fst/log.h>
#include <fst/bi-table.h>
#include <fst/expanded-fst.h>
#include <fst/filter-state.h>
#include <fst/fst.h>
#include <fst/properties.h>
#include <fst/util.h>
namespace fst {
// State tables determine the bijective mapping between state tuples (e.g., in
// composition, triples of two FST states and a composition filter state) and
// their corresponding state IDs. They are classes, templated on state tuples,
// with the following interface:
//
// template <class T>
// class StateTable {
// public:
// using StateTuple = T;
//
// // Required constructors.
// StateTable();
//
// StateTable(const StateTable &);
//
// // Looks up state ID by tuple. If it doesn't exist, then add it.
// StateId FindState(const StateTuple &tuple);
//
// // Looks up state tuple by state ID.
// const StateTuple<StateId> &Tuple(StateId s) const;
//
// // # of stored tuples.
// StateId Size() const;
// };
//
// A state tuple has the form:
//
// template <class S>
// struct StateTuple {
// using StateId = S;
//
// // Required constructors.
//
// StateTuple();
//
// StateTuple(const StateTuple &tuple);
// };
// An implementation using a hash map for the tuple to state ID mapping. The
// state tuple T must support operator==.
template <class T, class H>
class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
public:
using StateTuple = T;
using StateId = typename StateTuple::StateId;
using HashBiTable<StateId, StateTuple, H>::FindId;
using HashBiTable<StateId, StateTuple, H>::FindEntry;
using HashBiTable<StateId, StateTuple, H>::Size;
HashStateTable() : HashBiTable<StateId, StateTuple, H>() {}
explicit HashStateTable(size_t table_size)
: HashBiTable<StateId, StateTuple, H>(table_size) {}
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};
// An implementation using a hash map for the tuple to state ID mapping. The
// state tuple T must support operator==.
template <class T, class H>
class CompactHashStateTable
: public CompactHashBiTable<typename T::StateId, T, H> {
public:
using StateTuple = T;
using StateId = typename StateTuple::StateId;
using CompactHashBiTable<StateId, StateTuple, H>::FindId;
using CompactHashBiTable<StateId, StateTuple, H>::FindEntry;
using CompactHashBiTable<StateId, StateTuple, H>::Size;
CompactHashStateTable() : CompactHashBiTable<StateId, StateTuple, H>() {}
explicit CompactHashStateTable(size_t table_size)
: CompactHashBiTable<StateId, StateTuple, H>(table_size) {}
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};
// An implementation using a vector for the tuple to state mapping. It is
// passed a fingerprint functor that should fingerprint tuples uniquely to an
// integer that can used as a vector index. Normally, VectorStateTable
// constructs the fingerprint functor. Alternately, the user can pass this
// object, in which case the table takes ownership.
template <class T, class FP>
class VectorStateTable : public VectorBiTable<typename T::StateId, T, FP> {
public:
using StateTuple = T;
using StateId = typename StateTuple::StateId;
using VectorBiTable<StateId, StateTuple, FP>::FindId;
using VectorBiTable<StateId, StateTuple, FP>::FindEntry;
using VectorBiTable<StateId, StateTuple, FP>::Size;
using VectorBiTable<StateId, StateTuple, FP>::Fingerprint;
explicit VectorStateTable(const FP &fingerprint = FP(), size_t table_size = 0)
: VectorBiTable<StateId, StateTuple, FP>(fingerprint, table_size) {}
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};
// An implementation using a vector and a compact hash table. The selection
// functor returns true for tuples to be hashed in the vector. The fingerprint
// functor should fingerprint tuples uniquely to an integer that can be used as
// a vector index. A hash functor is used when hashing tuples into the compact
// hash table.
template <class T, class Select, class FP, class H>
class VectorHashStateTable
: public VectorHashBiTable<typename T::StateId, T, Select, FP, H> {
public:
using StateTuple = T;
using StateId = typename StateTuple::StateId;
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::FindId;
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::FindEntry;
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Size;
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Selector;
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Fingerprint;
using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::HashFunction;
VectorHashStateTable(const Select &select, const FP &fingerprint,
const H &hash, size_t vector_size = 0,
size_t tuple_size = 0)
: VectorHashBiTable<StateId, StateTuple, Select, FP, H>(
select, fingerprint, hash, vector_size, tuple_size) {}
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};
// An implementation using a hash map to map from tuples to state IDs. This
// version permits erasing of states. The state tuple's default constructor
// must produce a tuple that will never be seen and the table must suppor
// operator==.
template <class T, class H>
class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, H> {
public:
using StateTuple = T;
using StateId = typename StateTuple::StateId;
using ErasableBiTable<StateId, StateTuple, H>::FindId;
using ErasableBiTable<StateId, StateTuple, H>::FindEntry;
using ErasableBiTable<StateId, StateTuple, H>::Size;
using ErasableBiTable<StateId, StateTuple, H>::Erase;
ErasableStateTable() : ErasableBiTable<StateId, StateTuple, H>() {}
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};
// The composition state table has the form:
//
// template <class Arc, class FilterState>
// class ComposeStateTable {
// public:
// using StateId = typename Arc::StateId;
//
// // Required constructors.
//
// ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
// ComposeStateTable(const ComposeStateTable<Arc, FilterState> &table);
//
// // Looks up a state ID by tuple, adding it if doesn't exist.
// StateId FindState(const StateTuple &tuple);
//
// // Looks up a tuple by state ID.
// const ComposeStateTuple<StateId> &Tuple(StateId s) const;
//
// // The number of stored tuples.
// StateId Size() const;
//
// // Return true if error was encountered.
// bool Error() const;
// };
//
// The following interface is used to represent the composition state.
//
// template <class S, class FS>
// class CompositionStateTuple {
// public:
// using StateId = typename StateId;
// using FS = FilterState;
//
// // Required constructors.
// StateTuple();
// StateTuple(StateId s1, StateId s2, const FilterState &fs);
//
// StateId StateId1() const;
// StateId StateId2() const;
//
// FilterState GetFilterState() const;
//
// std::pair<StateId, StateId> StatePair() const;
//
// size_t Hash() const;
//
// friend bool operator==(const StateTuple& x, const StateTuple &y);
// }
//
template <typename S, typename FS>
class DefaultComposeStateTuple {
public:
using StateId = S;
using FilterState = FS;
DefaultComposeStateTuple()
: state_pair_(kNoStateId, kNoStateId), fs_(FilterState::NoState()) {}
DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &fs)
: state_pair_(s1, s2), fs_(fs) {}
StateId StateId1() const { return state_pair_.first; }
StateId StateId2() const { return state_pair_.second; }
FilterState GetFilterState() const { return fs_; }
const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
friend bool operator==(const DefaultComposeStateTuple &x,
const DefaultComposeStateTuple &y) {
return (&x == &y) || (x.state_pair_ == y.state_pair_ && x.fs_ == y.fs_);
}
size_t Hash() const {
return static_cast<size_t>(StateId1()) +
static_cast<size_t>(StateId2()) * 7853u +
GetFilterState().Hash() * 7867u;
}
private:
std::pair<StateId, StateId> state_pair_;
FilterState fs_; // State of composition filter.
};
// Specialization for TrivialFilterState that does not explicitly store the
// filter state since it is always the unique non-blocking state.
template <typename S>
class DefaultComposeStateTuple<S, TrivialFilterState> {
public:
using StateId = S;
using FilterState = TrivialFilterState;
DefaultComposeStateTuple() : state_pair_(kNoStateId, kNoStateId) {}
DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &)
: state_pair_(s1, s2) {}
StateId StateId1() const { return state_pair_.first; }
StateId StateId2() const { return state_pair_.second; }
FilterState GetFilterState() const { return FilterState(true); }
const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
friend bool operator==(const DefaultComposeStateTuple &x,
const DefaultComposeStateTuple &y) {
return (&x == &y) || (x.state_pair_ == y.state_pair_);
}
size_t Hash() const { return StateId1() + StateId2() * size_t{7853}; }
private:
std::pair<StateId, StateId> state_pair_;
};
// Hashing of composition state tuples.
template <typename T>
class ComposeHash {
public:
size_t operator()(const T &t) const { return t.Hash(); }
};
// A HashStateTable over composition tuples.
template <typename Arc, typename FilterState,
typename StateTuple =
DefaultComposeStateTuple<typename Arc::StateId, FilterState>,
typename StateTable =
CompactHashStateTable<StateTuple, ComposeHash<StateTuple>>>
class GenericComposeStateTable : public StateTable {
public:
using StateId = typename Arc::StateId;
GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
size_t table_size)
: StateTable(table_size) {}
constexpr bool Error() const { return false; }
private:
GenericComposeStateTable &operator=(const GenericComposeStateTable &table) =
delete;
};
// Fingerprint for general composition tuples.
template <typename StateTuple>
class ComposeFingerprint {
public:
using StateId = typename StateTuple::StateId;
// Required but suboptimal constructor.
ComposeFingerprint() : mult1_(8192), mult2_(8192) {
LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
}
// Constructor is provided the sizes of the input FSTs.
ComposeFingerprint(StateId nstates1, StateId nstates2)
: mult1_(nstates1), mult2_(nstates1 * nstates2) {}
size_t operator()(const StateTuple &tuple) const {
return tuple.StateId1() + tuple.StateId2() * mult1_ +
tuple.GetFilterState().Hash() * mult2_;
}
private:
const ssize_t mult1_;
const ssize_t mult2_;
};
// Useful when the first composition state determines the tuple.
template <typename StateTuple>
class ComposeState1Fingerprint {
public:
size_t operator()(const StateTuple &tuple) { return tuple.StateId1(); }
};
// Useful when the second composition state determines the tuple.
template <typename StateTuple>
class ComposeState2Fingerprint {
public:
size_t operator()(const StateTuple &tuple) { return tuple.StateId2(); }
};
// A VectorStateTable over composition tuples. This can be used when the
// product of number of states in FST1 and FST2 (and the composition filter
// state hash) is manageable. If the FSTs are not expanded FSTs, they will
// first have their states counted.
template <typename Arc, typename StateTuple>
class ProductComposeStateTable
: public VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>> {
public:
using StateId = typename Arc::StateId;
using StateTable =
VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>>;
ProductComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
size_t table_size = 0)
: StateTable(ComposeFingerprint<StateTuple>(CountStates(fst1),
CountStates(fst2)),
table_size) {}
ProductComposeStateTable(
const ProductComposeStateTable<Arc, StateTuple> &table)
: StateTable(ComposeFingerprint<StateTuple>(table.Fingerprint())) {}
constexpr bool Error() const { return false; }
private:
ProductComposeStateTable &operator=(const ProductComposeStateTable &table) =
delete;
};
// A vector-backed table over composition tuples which can be used when the
// first FST is a string (i.e., satisfies kString property) and the second is
// deterministic and epsilon-free. It should be used with a composition filter
// that creates at most one filter state per tuple under these conditions (e.g.,
// SequenceComposeFilter or MatchComposeFilter).
template <typename Arc, typename StateTuple>
class StringDetComposeStateTable
: public VectorStateTable<StateTuple,
ComposeState1Fingerprint<StateTuple>> {
public:
using StateId = typename Arc::StateId;
using StateTable =
VectorStateTable<StateTuple, ComposeState1Fingerprint<StateTuple>>;
StringDetComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
: error_(false) {
static constexpr auto props2 = kIDeterministic | kNoIEpsilons;
if (fst1.Properties(kString, true) != kString) {
FSTERROR() << "StringDetComposeStateTable: 1st FST is not a string";
error_ = true;
} else if (fst2.Properties(props2, true) != props2) {
FSTERROR() << "StringDetComposeStateTable: 2nd FST is not deterministic "
"and epsilon-free";
error_ = true;
}
}
StringDetComposeStateTable(
const StringDetComposeStateTable<Arc, StateTuple> &table)
: StateTable(table), error_(table.error_) {}
bool Error() const { return error_; }
private:
bool error_;
StringDetComposeStateTable &operator=(const StringDetComposeStateTable &) =
delete;
};
// A vector-backed table over composition tuples which can be used when the
// first FST is deterministic and epsilon-free and the second is a string (i.e.,
// satisfies kString). It should be used with a composition filter that creates
// at most one filter state per tuple under these conditions (e.g.,
// SequenceComposeFilter or MatchComposeFilter).
template <typename Arc, typename StateTuple>
class DetStringComposeStateTable
: public VectorStateTable<StateTuple,
ComposeState2Fingerprint<StateTuple>> {
public:
using StateId = typename Arc::StateId;
using StateTable =
VectorStateTable<StateTuple, ComposeState2Fingerprint<StateTuple>>;
DetStringComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
: error_(false) {
static constexpr auto props = kODeterministic | kNoOEpsilons;
if (fst1.Properties(props, true) != props) {
FSTERROR() << "StringDetComposeStateTable: 1st FST is not "
<< "input-deterministic and epsilon-free";
error_ = true;
} else if (fst2.Properties(kString, true) != kString) {
FSTERROR() << "DetStringComposeStateTable: 2nd FST is not a string";
error_ = true;
}
}
DetStringComposeStateTable(
const DetStringComposeStateTable<Arc, StateTuple> &table)
: StateTable(table), error_(table.error_) {}
bool Error() const { return error_; }
private:
bool error_;
DetStringComposeStateTable &operator=(const DetStringComposeStateTable &) =
delete;
};
// An erasable table over composition tuples. The Erase(StateId) method can be
// called if the user either is sure that composition will never return to that
// tuple or doesn't care that if it does, it is assigned a new state ID.
template <typename Arc, typename StateTuple>
class ErasableComposeStateTable
: public ErasableStateTable<StateTuple, ComposeHash<StateTuple>> {
public:
ErasableComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
constexpr bool Error() const { return false; }
private:
ErasableComposeStateTable &operator=(const ErasableComposeStateTable &table) =
delete;
};
} // namespace fst
#endif // FST_STATE_TABLE_H_