You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

512 lines
17 KiB

  1. // Copyright 2005-2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the 'License');
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an 'AS IS' BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // See www.openfst.org for extensive documentation on this weighted
  16. // finite-state transducer library.
  17. //
  18. // Classes for representing the mapping between state tuples and state IDs.
  19. #ifndef FST_STATE_TABLE_H_
  20. #define FST_STATE_TABLE_H_
  21. #include <sys/types.h>
  22. #include <cstddef>
  23. #include <deque>
  24. #include <utility>
  25. #include <vector>
  26. #include <fst/log.h>
  27. #include <fst/bi-table.h>
  28. #include <fst/expanded-fst.h>
  29. #include <fst/filter-state.h>
  30. #include <fst/fst.h>
  31. #include <fst/properties.h>
  32. #include <fst/util.h>
  33. namespace fst {
  34. // State tables determine the bijective mapping between state tuples (e.g., in
  35. // composition, triples of two FST states and a composition filter state) and
  36. // their corresponding state IDs. They are classes, templated on state tuples,
  37. // with the following interface:
  38. //
  39. // template <class T>
  40. // class StateTable {
  41. // public:
  42. // using StateTuple = T;
  43. //
  44. // // Required constructors.
  45. // StateTable();
  46. //
  47. // StateTable(const StateTable &);
  48. //
  49. // // Looks up state ID by tuple. If it doesn't exist, then add it.
  50. // StateId FindState(const StateTuple &tuple);
  51. //
  52. // // Looks up state tuple by state ID.
  53. // const StateTuple<StateId> &Tuple(StateId s) const;
  54. //
  55. // // # of stored tuples.
  56. // StateId Size() const;
  57. // };
  58. //
  59. // A state tuple has the form:
  60. //
  61. // template <class S>
  62. // struct StateTuple {
  63. // using StateId = S;
  64. //
  65. // // Required constructors.
  66. //
  67. // StateTuple();
  68. //
  69. // StateTuple(const StateTuple &tuple);
  70. // };
  71. // An implementation using a hash map for the tuple to state ID mapping. The
  72. // state tuple T must support operator==.
  73. template <class T, class H>
  74. class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
  75. public:
  76. using StateTuple = T;
  77. using StateId = typename StateTuple::StateId;
  78. using HashBiTable<StateId, StateTuple, H>::FindId;
  79. using HashBiTable<StateId, StateTuple, H>::FindEntry;
  80. using HashBiTable<StateId, StateTuple, H>::Size;
  81. HashStateTable() : HashBiTable<StateId, StateTuple, H>() {}
  82. explicit HashStateTable(size_t table_size)
  83. : HashBiTable<StateId, StateTuple, H>(table_size) {}
  84. StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
  85. const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
  86. };
  87. // An implementation using a hash map for the tuple to state ID mapping. The
  88. // state tuple T must support operator==.
  89. template <class T, class H>
  90. class CompactHashStateTable
  91. : public CompactHashBiTable<typename T::StateId, T, H> {
  92. public:
  93. using StateTuple = T;
  94. using StateId = typename StateTuple::StateId;
  95. using CompactHashBiTable<StateId, StateTuple, H>::FindId;
  96. using CompactHashBiTable<StateId, StateTuple, H>::FindEntry;
  97. using CompactHashBiTable<StateId, StateTuple, H>::Size;
  98. CompactHashStateTable() : CompactHashBiTable<StateId, StateTuple, H>() {}
  99. explicit CompactHashStateTable(size_t table_size)
  100. : CompactHashBiTable<StateId, StateTuple, H>(table_size) {}
  101. StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
  102. const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
  103. };
  104. // An implementation using a vector for the tuple to state mapping. It is
  105. // passed a fingerprint functor that should fingerprint tuples uniquely to an
  106. // integer that can used as a vector index. Normally, VectorStateTable
  107. // constructs the fingerprint functor. Alternately, the user can pass this
  108. // object, in which case the table takes ownership.
  109. template <class T, class FP>
  110. class VectorStateTable : public VectorBiTable<typename T::StateId, T, FP> {
  111. public:
  112. using StateTuple = T;
  113. using StateId = typename StateTuple::StateId;
  114. using VectorBiTable<StateId, StateTuple, FP>::FindId;
  115. using VectorBiTable<StateId, StateTuple, FP>::FindEntry;
  116. using VectorBiTable<StateId, StateTuple, FP>::Size;
  117. using VectorBiTable<StateId, StateTuple, FP>::Fingerprint;
  118. explicit VectorStateTable(const FP &fingerprint = FP(), size_t table_size = 0)
  119. : VectorBiTable<StateId, StateTuple, FP>(fingerprint, table_size) {}
  120. StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
  121. const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
  122. };
  123. // An implementation using a vector and a compact hash table. The selection
  124. // functor returns true for tuples to be hashed in the vector. The fingerprint
  125. // functor should fingerprint tuples uniquely to an integer that can be used as
  126. // a vector index. A hash functor is used when hashing tuples into the compact
  127. // hash table.
  128. template <class T, class Select, class FP, class H>
  129. class VectorHashStateTable
  130. : public VectorHashBiTable<typename T::StateId, T, Select, FP, H> {
  131. public:
  132. using StateTuple = T;
  133. using StateId = typename StateTuple::StateId;
  134. using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::FindId;
  135. using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::FindEntry;
  136. using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Size;
  137. using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Selector;
  138. using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Fingerprint;
  139. using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::HashFunction;
  140. VectorHashStateTable(const Select &select, const FP &fingerprint,
  141. const H &hash, size_t vector_size = 0,
  142. size_t tuple_size = 0)
  143. : VectorHashBiTable<StateId, StateTuple, Select, FP, H>(
  144. select, fingerprint, hash, vector_size, tuple_size) {}
  145. StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
  146. const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
  147. };
  148. // An implementation using a hash map to map from tuples to state IDs. This
  149. // version permits erasing of states. The state tuple's default constructor
  150. // must produce a tuple that will never be seen and the table must suppor
  151. // operator==.
  152. template <class T, class H>
  153. class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, H> {
  154. public:
  155. using StateTuple = T;
  156. using StateId = typename StateTuple::StateId;
  157. using ErasableBiTable<StateId, StateTuple, H>::FindId;
  158. using ErasableBiTable<StateId, StateTuple, H>::FindEntry;
  159. using ErasableBiTable<StateId, StateTuple, H>::Size;
  160. using ErasableBiTable<StateId, StateTuple, H>::Erase;
  161. ErasableStateTable() : ErasableBiTable<StateId, StateTuple, H>() {}
  162. StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
  163. const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
  164. };
  165. // The composition state table has the form:
  166. //
  167. // template <class Arc, class FilterState>
  168. // class ComposeStateTable {
  169. // public:
  170. // using StateId = typename Arc::StateId;
  171. //
  172. // // Required constructors.
  173. //
  174. // ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
  175. // ComposeStateTable(const ComposeStateTable<Arc, FilterState> &table);
  176. //
  177. // // Looks up a state ID by tuple, adding it if doesn't exist.
  178. // StateId FindState(const StateTuple &tuple);
  179. //
  180. // // Looks up a tuple by state ID.
  181. // const ComposeStateTuple<StateId> &Tuple(StateId s) const;
  182. //
  183. // // The number of stored tuples.
  184. // StateId Size() const;
  185. //
  186. // // Return true if error was encountered.
  187. // bool Error() const;
  188. // };
  189. //
  190. // The following interface is used to represent the composition state.
  191. //
  192. // template <class S, class FS>
  193. // class CompositionStateTuple {
  194. // public:
  195. // using StateId = typename StateId;
  196. // using FS = FilterState;
  197. //
  198. // // Required constructors.
  199. // StateTuple();
  200. // StateTuple(StateId s1, StateId s2, const FilterState &fs);
  201. //
  202. // StateId StateId1() const;
  203. // StateId StateId2() const;
  204. //
  205. // FilterState GetFilterState() const;
  206. //
  207. // std::pair<StateId, StateId> StatePair() const;
  208. //
  209. // size_t Hash() const;
  210. //
  211. // friend bool operator==(const StateTuple& x, const StateTuple &y);
  212. // }
  213. //
  214. template <typename S, typename FS>
  215. class DefaultComposeStateTuple {
  216. public:
  217. using StateId = S;
  218. using FilterState = FS;
  219. DefaultComposeStateTuple()
  220. : state_pair_(kNoStateId, kNoStateId), fs_(FilterState::NoState()) {}
  221. DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &fs)
  222. : state_pair_(s1, s2), fs_(fs) {}
  223. StateId StateId1() const { return state_pair_.first; }
  224. StateId StateId2() const { return state_pair_.second; }
  225. FilterState GetFilterState() const { return fs_; }
  226. const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
  227. friend bool operator==(const DefaultComposeStateTuple &x,
  228. const DefaultComposeStateTuple &y) {
  229. return (&x == &y) || (x.state_pair_ == y.state_pair_ && x.fs_ == y.fs_);
  230. }
  231. size_t Hash() const {
  232. return static_cast<size_t>(StateId1()) +
  233. static_cast<size_t>(StateId2()) * 7853u +
  234. GetFilterState().Hash() * 7867u;
  235. }
  236. private:
  237. std::pair<StateId, StateId> state_pair_;
  238. FilterState fs_; // State of composition filter.
  239. };
  240. // Specialization for TrivialFilterState that does not explicitly store the
  241. // filter state since it is always the unique non-blocking state.
  242. template <typename S>
  243. class DefaultComposeStateTuple<S, TrivialFilterState> {
  244. public:
  245. using StateId = S;
  246. using FilterState = TrivialFilterState;
  247. DefaultComposeStateTuple() : state_pair_(kNoStateId, kNoStateId) {}
  248. DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &)
  249. : state_pair_(s1, s2) {}
  250. StateId StateId1() const { return state_pair_.first; }
  251. StateId StateId2() const { return state_pair_.second; }
  252. FilterState GetFilterState() const { return FilterState(true); }
  253. const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
  254. friend bool operator==(const DefaultComposeStateTuple &x,
  255. const DefaultComposeStateTuple &y) {
  256. return (&x == &y) || (x.state_pair_ == y.state_pair_);
  257. }
  258. size_t Hash() const { return StateId1() + StateId2() * size_t{7853}; }
  259. private:
  260. std::pair<StateId, StateId> state_pair_;
  261. };
  262. // Hashing of composition state tuples.
  263. template <typename T>
  264. class ComposeHash {
  265. public:
  266. size_t operator()(const T &t) const { return t.Hash(); }
  267. };
  268. // A HashStateTable over composition tuples.
  269. template <typename Arc, typename FilterState,
  270. typename StateTuple =
  271. DefaultComposeStateTuple<typename Arc::StateId, FilterState>,
  272. typename StateTable =
  273. CompactHashStateTable<StateTuple, ComposeHash<StateTuple>>>
  274. class GenericComposeStateTable : public StateTable {
  275. public:
  276. using StateId = typename Arc::StateId;
  277. GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
  278. GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
  279. size_t table_size)
  280. : StateTable(table_size) {}
  281. constexpr bool Error() const { return false; }
  282. private:
  283. GenericComposeStateTable &operator=(const GenericComposeStateTable &table) =
  284. delete;
  285. };
  286. // Fingerprint for general composition tuples.
  287. template <typename StateTuple>
  288. class ComposeFingerprint {
  289. public:
  290. using StateId = typename StateTuple::StateId;
  291. // Required but suboptimal constructor.
  292. ComposeFingerprint() : mult1_(8192), mult2_(8192) {
  293. LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
  294. }
  295. // Constructor is provided the sizes of the input FSTs.
  296. ComposeFingerprint(StateId nstates1, StateId nstates2)
  297. : mult1_(nstates1), mult2_(nstates1 * nstates2) {}
  298. size_t operator()(const StateTuple &tuple) const {
  299. return tuple.StateId1() + tuple.StateId2() * mult1_ +
  300. tuple.GetFilterState().Hash() * mult2_;
  301. }
  302. private:
  303. const ssize_t mult1_;
  304. const ssize_t mult2_;
  305. };
  306. // Useful when the first composition state determines the tuple.
  307. template <typename StateTuple>
  308. class ComposeState1Fingerprint {
  309. public:
  310. size_t operator()(const StateTuple &tuple) { return tuple.StateId1(); }
  311. };
  312. // Useful when the second composition state determines the tuple.
  313. template <typename StateTuple>
  314. class ComposeState2Fingerprint {
  315. public:
  316. size_t operator()(const StateTuple &tuple) { return tuple.StateId2(); }
  317. };
  318. // A VectorStateTable over composition tuples. This can be used when the
  319. // product of number of states in FST1 and FST2 (and the composition filter
  320. // state hash) is manageable. If the FSTs are not expanded FSTs, they will
  321. // first have their states counted.
  322. template <typename Arc, typename StateTuple>
  323. class ProductComposeStateTable
  324. : public VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>> {
  325. public:
  326. using StateId = typename Arc::StateId;
  327. using StateTable =
  328. VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>>;
  329. ProductComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
  330. size_t table_size = 0)
  331. : StateTable(ComposeFingerprint<StateTuple>(CountStates(fst1),
  332. CountStates(fst2)),
  333. table_size) {}
  334. ProductComposeStateTable(
  335. const ProductComposeStateTable<Arc, StateTuple> &table)
  336. : StateTable(ComposeFingerprint<StateTuple>(table.Fingerprint())) {}
  337. constexpr bool Error() const { return false; }
  338. private:
  339. ProductComposeStateTable &operator=(const ProductComposeStateTable &table) =
  340. delete;
  341. };
  342. // A vector-backed table over composition tuples which can be used when the
  343. // first FST is a string (i.e., satisfies kString property) and the second is
  344. // deterministic and epsilon-free. It should be used with a composition filter
  345. // that creates at most one filter state per tuple under these conditions (e.g.,
  346. // SequenceComposeFilter or MatchComposeFilter).
  347. template <typename Arc, typename StateTuple>
  348. class StringDetComposeStateTable
  349. : public VectorStateTable<StateTuple,
  350. ComposeState1Fingerprint<StateTuple>> {
  351. public:
  352. using StateId = typename Arc::StateId;
  353. using StateTable =
  354. VectorStateTable<StateTuple, ComposeState1Fingerprint<StateTuple>>;
  355. StringDetComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
  356. : error_(false) {
  357. static constexpr auto props2 = kIDeterministic | kNoIEpsilons;
  358. if (fst1.Properties(kString, true) != kString) {
  359. FSTERROR() << "StringDetComposeStateTable: 1st FST is not a string";
  360. error_ = true;
  361. } else if (fst2.Properties(props2, true) != props2) {
  362. FSTERROR() << "StringDetComposeStateTable: 2nd FST is not deterministic "
  363. "and epsilon-free";
  364. error_ = true;
  365. }
  366. }
  367. StringDetComposeStateTable(
  368. const StringDetComposeStateTable<Arc, StateTuple> &table)
  369. : StateTable(table), error_(table.error_) {}
  370. bool Error() const { return error_; }
  371. private:
  372. bool error_;
  373. StringDetComposeStateTable &operator=(const StringDetComposeStateTable &) =
  374. delete;
  375. };
  376. // A vector-backed table over composition tuples which can be used when the
  377. // first FST is deterministic and epsilon-free and the second is a string (i.e.,
  378. // satisfies kString). It should be used with a composition filter that creates
  379. // at most one filter state per tuple under these conditions (e.g.,
  380. // SequenceComposeFilter or MatchComposeFilter).
  381. template <typename Arc, typename StateTuple>
  382. class DetStringComposeStateTable
  383. : public VectorStateTable<StateTuple,
  384. ComposeState2Fingerprint<StateTuple>> {
  385. public:
  386. using StateId = typename Arc::StateId;
  387. using StateTable =
  388. VectorStateTable<StateTuple, ComposeState2Fingerprint<StateTuple>>;
  389. DetStringComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
  390. : error_(false) {
  391. static constexpr auto props = kODeterministic | kNoOEpsilons;
  392. if (fst1.Properties(props, true) != props) {
  393. FSTERROR() << "StringDetComposeStateTable: 1st FST is not "
  394. << "input-deterministic and epsilon-free";
  395. error_ = true;
  396. } else if (fst2.Properties(kString, true) != kString) {
  397. FSTERROR() << "DetStringComposeStateTable: 2nd FST is not a string";
  398. error_ = true;
  399. }
  400. }
  401. DetStringComposeStateTable(
  402. const DetStringComposeStateTable<Arc, StateTuple> &table)
  403. : StateTable(table), error_(table.error_) {}
  404. bool Error() const { return error_; }
  405. private:
  406. bool error_;
  407. DetStringComposeStateTable &operator=(const DetStringComposeStateTable &) =
  408. delete;
  409. };
  410. // An erasable table over composition tuples. The Erase(StateId) method can be
  411. // called if the user either is sure that composition will never return to that
  412. // tuple or doesn't care that if it does, it is assigned a new state ID.
  413. template <typename Arc, typename StateTuple>
  414. class ErasableComposeStateTable
  415. : public ErasableStateTable<StateTuple, ComposeHash<StateTuple>> {
  416. public:
  417. ErasableComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
  418. constexpr bool Error() const { return false; }
  419. private:
  420. ErasableComposeStateTable &operator=(const ErasableComposeStateTable &table) =
  421. delete;
  422. };
  423. } // namespace fst
  424. #endif // FST_STATE_TABLE_H_