You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1646 lines
57 KiB

  1. // Copyright 2005-2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the 'License');
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an 'AS IS' BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // See www.openfst.org for extensive documentation on this weighted
  16. // finite-state transducer library.
  17. //
  18. // FST Class for memory-efficient representation of common types of
  19. // FSTs: linear automata, acceptors, unweighted FSTs, ...
  20. #ifndef FST_COMPACT_FST_H_
  21. #define FST_COMPACT_FST_H_
  22. #include <sys/types.h>
  23. #include <climits>
  24. #include <cstddef>
  25. #include <cstdint>
  26. #include <cstdlib>
  27. #include <cstring>
  28. #include <ctime>
  29. #include <istream>
  30. #include <iterator>
  31. #include <memory>
  32. #include <ostream>
  33. #include <string>
  34. #include <tuple>
  35. #include <utility>
  36. #include <vector>
  37. #include <fst/log.h>
  38. #include <fst/arc.h>
  39. #include <fst/cache.h>
  40. #include <fst/expanded-fst.h>
  41. #include <fst/fst-decl.h> // For optional argument declarations
  42. #include <fst/fst.h>
  43. #include <fst/impl-to-fst.h>
  44. #include <fst/mapped-file.h>
  45. #include <fst/matcher.h>
  46. #include <fst/properties.h>
  47. #include <fst/test-properties.h>
  48. #include <fst/util.h>
  49. #include <string_view>
  50. namespace fst {
  51. struct CompactFstOptions : public CacheOptions {
  52. // The default caching behaviour is to do no caching. Most compactors are
  53. // cheap and therefore we save memory by not doing caching.
  54. CompactFstOptions() : CacheOptions(true, 0) {}
  55. explicit CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
  56. };
  57. // New (Fst) Compactor interface - used by CompactFst. This interface
  58. // allows complete flexibility in how the compaction is accomplished.
  59. //
  60. // class Compactor {
  61. // public:
  62. // // Constructor from the Fst to be compacted. If compactor is present,
  63. // // only optional state should be copied from it. Examples of this
  64. // // optional state include compression level or ArcCompactors.
  65. // explicit Compactor(const Fst<Arc> &fst,
  66. // shared_ptr<Compactor> compactor = nullptr);
  67. // // Copy constructor. Must make a thread-safe copy suitable for use by
  68. // // by Fst::Copy(/*safe=*/true). Only thread-unsafe data structures
  69. // // need to be deeply copied. Ideally, this constructor is O(1) and any
  70. // // large structures are thread-safe and shared, while small ones may
  71. // // need to be copied.
  72. // Compactor(const Compactor &compactor);
  73. // // Default constructor (optional, see comment below).
  74. // Compactor();
  75. //
  76. // // Returns the start state, number of states, and total number of arcs
  77. // // of the compacted Fst
  78. // StateId Start() const;
  79. // StateId NumStates() const;
  80. // size_t NumArcs() const;
  81. //
  82. // // Accessor class for state attributes.
  83. // class State {
  84. // public:
  85. // State(); // Required, corresponds to kNoStateId.
  86. // // This constructor may, of course, also take a const Compactor *
  87. // // for the first argument. It is recommended to use const Compactor *
  88. // // if possible, but this can be Compactor * if necessary.
  89. // State(Compactor *c, StateId s); // Accessor for StateId 's'.
  90. // StateId GetStateId() const;
  91. // Weight Final() const;
  92. // size_t NumArcs() const;
  93. // // Gets the 'i'th arc for the state. Requires i < NumArcs().
  94. // // Flags are a bitmask of the kArc*Value flags that ArcIterator uses.
  95. // Arc GetArc(size_t i, uint8_t flags) const;
  96. // };
  97. //
  98. // // Modifies 'state' accessor to provide access to state id 's'.
  99. // void SetState(StateId s, State *state);
  100. //
  101. // // Tests whether 'fst' can be compacted by this compactor.
  102. // template <typename A>
  103. // bool IsCompatible(const Fst<A> &fst) const;
  104. //
  105. // // Returns the properties that are always when an FST with the
  106. // // specified properties is compacted using this compactor.
  107. // // This function should clear bits for properties that no longer
  108. // // hold and set those for properties that are known to hold.
  109. // uint64_t Properties(uint64_t props) const;
  110. //
  111. // // Returns a string identifying the type of compactor.
  112. // static const std::string &Type();
  113. //
  114. // // Returns true if an error has occurred.
  115. // bool Error() const;
  116. //
  117. // // Writes a compactor to a file.
  118. // bool Write(std::ostream &strm, const FstWriteOptions &opts) const;
  119. //
  120. // // Reads a compactor from a file.
  121. // static Compactor *Read(std::istream &strm, const FstReadOptions &opts,
  122. // const FstHeader &hdr);
  123. // };
  124. //
  125. // Old ArcCompactor Interface:
  126. //
  127. // This interface is not deprecated; it, along with CompactArcStore and
  128. // other Stores that implement its interface, is simply more constrained
  129. // by essentially forcing the implementation to use an index array
  130. // and an arc array, but giving flexibility in how those are implemented.
  131. // This interface may still be useful and more convenient if that is the
  132. // desired representation.
  133. //
  134. // The ArcCompactor class determines how arcs and final weights are compacted
  135. // and expanded.
  136. //
  137. // Final weights are treated as transitions to the superfinal state, i.e.,
  138. // ilabel = olabel = kNoLabel and nextstate = kNoStateId.
  139. //
  140. // There are two types of compactors:
  141. //
  142. // * Fixed out-degree compactors: 'compactor.Size()' returns a positive integer
  143. // 's'. An FST can be compacted by this compactor only if each state has
  144. // exactly 's' outgoing transitions (counting a non-Zero() final weight as a
  145. // transition). A typical example is a compactor for string FSTs, i.e.,
  146. // 's == 1'.
  147. //
  148. // * Variable out-degree compactors: 'compactor.Size() == -1'. There are no
  149. // out-degree restrictions for these compactors.
  150. //
  151. // Interface:
  152. //
  153. // class ArcCompactor {
  154. // public:
  155. // // Default constructor (optional, see comment below).
  156. // ArcCompactor();
  157. //
  158. // // Copy constructor. Must make a thread-safe copy suitable for use by
  159. // // by Fst::Copy(/*safe=*/true). Only thread-unsafe data structures
  160. // // need to be deeply copied.
  161. // ArcCompactor(const ArcCompactor &);
  162. //
  163. // // Element is the type of the compacted transitions.
  164. // using Element = ...
  165. //
  166. // // Returns the compacted representation of a transition 'arc'
  167. // // at a state 's'.
  168. // Element Compact(StateId s, const Arc &arc);
  169. //
  170. // // Returns the transition at state 's' represented by the compacted
  171. // // transition 'e'.
  172. // Arc Expand(StateId s, const Element &e) const;
  173. //
  174. // // Returns -1 for variable out-degree compactors, and the mandatory
  175. // // out-degree otherwise.
  176. // ssize_t Size() const;
  177. //
  178. // // Tests whether an FST can be compacted by this compactor.
  179. // bool Compatible(const Fst<A> &fst) const;
  180. //
  181. // // Returns the properties that are always true for an FST compacted using
  182. // // this compactor. Any Fst with the inverse of these properties should
  183. // // be incompatible.
  184. // uint64_t Properties() const;
  185. //
  186. // // Returns a string identifying the type of compactor.
  187. // static const std::string &Type();
  188. //
  189. // // Writes a compactor to a file.
  190. // bool Write(std::ostream &strm) const;
  191. //
  192. // // Reads a compactor from a file.
  193. // static ArcCompactor *Read(std::istream &strm);
  194. // };
  195. //
  196. // The default constructor is only required for FST_REGISTER to work (i.e.,
  197. // enabling Convert() and the command-line utilities to work with this new
  198. // compactor). However, a default constructor always needs to be specified for
  199. // this code to compile, but one can have it simply raise an error when called,
  200. // like so:
  201. //
  202. // Compactor::Compactor() {
  203. // FSTERROR() << "Compactor: No default constructor";
  204. // }
  205. // Default implementation data for CompactArcCompactor. Only old-style
  206. // ArcCompactors are supported because the CompactArcStore constructors
  207. // use the old API.
  208. //
  209. // DefaultCompact store is thread-compatible, but not thread-safe.
  210. // The copy constructor makes a thread-safe copy.
  211. //
  212. // The implementation contains two arrays: 'states_' and 'compacts_'.
  213. //
  214. // For fixed out-degree compactors, the 'states_' array is unallocated. The
  215. // 'compacts_' array contains the compacted transitions. Its size is
  216. // 'ncompacts_'. The outgoing transitions at a given state are stored
  217. // consecutively. For a given state 's', its 'compactor.Size()' outgoing
  218. // transitions (including a superfinal transition when 's' is final), are stored
  219. // in positions ['s*compactor.Size()', '(s+1)*compactor.Size()').
  220. //
  221. // For variable out-degree compactors, the states_ array has size
  222. // 'nstates_ + 1' and contains positions in the 'compacts_' array. For a
  223. // given state 's', the compacted transitions of 's' are stored in positions
  224. // ['states_[s]', 'states_[s + 1]') in 'compacts_'. By convention,
  225. // 'states_[nstates_] == ncompacts_'.
  226. //
  227. // In both cases, the superfinal transitions (when 's' is final, i.e.,
  228. // 'Final(s) != Weight::Zero()') are stored first.
  229. //
  230. // The unsigned type U is used to represent indices into the compacts_ array.
  231. template <class Element, class Unsigned>
  232. class CompactArcStore {
  233. public:
  234. CompactArcStore() = default;
  235. // Makes a thread-safe copy. O(1).
  236. CompactArcStore(const CompactArcStore &) = default;
  237. template <class Arc, class ArcCompactor>
  238. CompactArcStore(const Fst<Arc> &fst, const ArcCompactor &arc_compactor);
  239. template <class Iterator, class ArcCompactor>
  240. CompactArcStore(const Iterator begin, const Iterator end,
  241. const ArcCompactor &arc_compactor);
  242. ~CompactArcStore() = default;
  243. template <class ArcCompactor>
  244. static CompactArcStore *Read(std::istream &strm, const FstReadOptions &opts,
  245. const FstHeader &hdr,
  246. const ArcCompactor &arc_compactor);
  247. bool Write(std::ostream &strm, const FstWriteOptions &opts) const;
  248. // Returns the starting index in 'compacts_' of the transitions
  249. // for state 'i'. See class-level comment for further details.
  250. // Requires that the CompactArcStore was constructed with a
  251. // variable out-degree compactor. Requires 0 <= i <= NumStates().
  252. // By convention, States(NumStates()) == NumCompacts().
  253. Unsigned States(ssize_t i) const { return states_[i]; }
  254. // Returns the compacted Element at position i. See class-level comment
  255. // for further details. Requires 0 <= i < NumCompacts().
  256. const Element &Compacts(size_t i) const { return compacts_[i]; }
  257. size_t NumStates() const { return nstates_; }
  258. size_t NumCompacts() const { return ncompacts_; }
  259. size_t NumArcs() const { return narcs_; }
  260. ssize_t Start() const { return start_; }
  261. bool Error() const { return error_; }
  262. // Returns a string identifying the type of data storage container.
  263. static const std::string &Type();
  264. private:
  265. std::shared_ptr<MappedFile> states_region_;
  266. std::shared_ptr<MappedFile> compacts_region_;
  267. // Unowned pointer into states_region_.
  268. Unsigned *states_ = nullptr;
  269. // Unowned pointer into compacts_region_.
  270. Element *compacts_ = nullptr;
  271. size_t nstates_ = 0;
  272. size_t ncompacts_ = 0;
  273. size_t narcs_ = 0;
  274. ssize_t start_ = kNoStateId;
  275. bool error_ = false;
  276. };
  277. template <class Element, class Unsigned>
  278. template <class Arc, class ArcCompactor>
  279. CompactArcStore<Element, Unsigned>::CompactArcStore(
  280. const Fst<Arc> &fst, const ArcCompactor &arc_compactor) {
  281. using StateId = typename Arc::StateId;
  282. using Weight = typename Arc::Weight;
  283. start_ = fst.Start();
  284. // Counts # of states and arcs.
  285. StateId nfinals = 0;
  286. for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
  287. ++nstates_;
  288. const auto s = siter.Value();
  289. narcs_ += fst.NumArcs(s);
  290. if (fst.Final(s) != Weight::Zero()) ++nfinals;
  291. }
  292. if (arc_compactor.Size() == -1) {
  293. states_region_ = fst::WrapUnique(MappedFile::Allocate(
  294. sizeof(states_[0]) * (nstates_ + 1), alignof(decltype(states_[0]))));
  295. states_ = static_cast<Unsigned *>(states_region_->mutable_data());
  296. ncompacts_ = narcs_ + nfinals;
  297. compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
  298. sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
  299. compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
  300. states_[nstates_] = ncompacts_;
  301. } else {
  302. states_ = nullptr;
  303. ncompacts_ = nstates_ * arc_compactor.Size();
  304. if ((narcs_ + nfinals) != ncompacts_) {
  305. FSTERROR() << "CompactArcStore: ArcCompactor incompatible with FST";
  306. error_ = true;
  307. return;
  308. }
  309. compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
  310. sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
  311. compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
  312. }
  313. size_t pos = 0;
  314. size_t fpos = 0;
  315. for (size_t s = 0; s < nstates_; ++s) {
  316. fpos = pos;
  317. if (arc_compactor.Size() == -1) states_[s] = pos;
  318. if (fst.Final(s) != Weight::Zero()) {
  319. compacts_[pos++] = arc_compactor.Compact(
  320. s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
  321. }
  322. for (ArcIterator<Fst<Arc>> aiter(fst, s); !aiter.Done(); aiter.Next()) {
  323. compacts_[pos++] = arc_compactor.Compact(s, aiter.Value());
  324. }
  325. if ((arc_compactor.Size() != -1) && (pos != fpos + arc_compactor.Size())) {
  326. FSTERROR() << "CompactArcStore: ArcCompactor incompatible with FST";
  327. error_ = true;
  328. return;
  329. }
  330. }
  331. if (pos != ncompacts_) {
  332. FSTERROR() << "CompactArcStore: ArcCompactor incompatible with FST";
  333. error_ = true;
  334. return;
  335. }
  336. }
  337. template <class Element, class Unsigned>
  338. template <class Iterator, class ArcCompactor>
  339. CompactArcStore<Element, Unsigned>::CompactArcStore(
  340. const Iterator begin, const Iterator end,
  341. const ArcCompactor &arc_compactor) {
  342. using Arc = typename ArcCompactor::Arc;
  343. using Weight = typename Arc::Weight;
  344. if (arc_compactor.Size() != -1) {
  345. ncompacts_ = std::distance(begin, end);
  346. if (arc_compactor.Size() == 1) {
  347. // For strings, allows implicit final weight. Empty input is the empty
  348. // string.
  349. if (ncompacts_ == 0) {
  350. ++ncompacts_;
  351. } else {
  352. const auto arc =
  353. arc_compactor.Expand(ncompacts_ - 1, *(begin + (ncompacts_ - 1)));
  354. if (arc.ilabel != kNoLabel) ++ncompacts_;
  355. }
  356. }
  357. if (ncompacts_ % arc_compactor.Size()) {
  358. FSTERROR() << "CompactArcStore: Size of input container incompatible"
  359. << " with arc compactor";
  360. error_ = true;
  361. return;
  362. }
  363. if (ncompacts_ == 0) return;
  364. start_ = 0;
  365. nstates_ = ncompacts_ / arc_compactor.Size();
  366. compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
  367. sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
  368. compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
  369. size_t i = 0;
  370. Iterator it = begin;
  371. for (; it != end; ++it, ++i) {
  372. compacts_[i] = *it;
  373. if (arc_compactor.Expand(i, *it).ilabel != kNoLabel) ++narcs_;
  374. }
  375. if (i < ncompacts_) {
  376. compacts_[i] = arc_compactor.Compact(
  377. i, Arc(kNoLabel, kNoLabel, Weight::One(), kNoStateId));
  378. }
  379. } else {
  380. if (std::distance(begin, end) == 0) return;
  381. // Count # of states, arcs and compacts.
  382. auto it = begin;
  383. for (size_t i = 0; it != end; ++it, ++i) {
  384. const auto arc = arc_compactor.Expand(i, *it);
  385. if (arc.ilabel != kNoLabel) {
  386. ++narcs_;
  387. ++ncompacts_;
  388. } else {
  389. ++nstates_;
  390. if (arc.weight != Weight::Zero()) ++ncompacts_;
  391. }
  392. }
  393. start_ = 0;
  394. compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
  395. sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
  396. compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
  397. states_region_ = fst::WrapUnique(MappedFile::Allocate(
  398. sizeof(states_[0]) * (nstates_ + 1), alignof(decltype(states_[0]))));
  399. states_ = static_cast<Unsigned *>(states_region_->mutable_data());
  400. states_[nstates_] = ncompacts_;
  401. size_t i = 0;
  402. size_t s = 0;
  403. for (it = begin; it != end; ++it) {
  404. const auto arc = arc_compactor.Expand(i, *it);
  405. if (arc.ilabel != kNoLabel) {
  406. compacts_[i++] = *it;
  407. } else {
  408. states_[s++] = i;
  409. if (arc.weight != Weight::Zero()) compacts_[i++] = *it;
  410. }
  411. }
  412. if ((s != nstates_) || (i != ncompacts_)) {
  413. FSTERROR() << "CompactArcStore: Ill-formed input container";
  414. error_ = true;
  415. return;
  416. }
  417. }
  418. }
  419. template <class Element, class Unsigned>
  420. template <class ArcCompactor>
  421. CompactArcStore<Element, Unsigned> *CompactArcStore<Element, Unsigned>::Read(
  422. std::istream &strm, const FstReadOptions &opts, const FstHeader &hdr,
  423. const ArcCompactor &arc_compactor) {
  424. auto data = std::make_unique<CompactArcStore>();
  425. data->start_ = hdr.Start();
  426. data->nstates_ = hdr.NumStates();
  427. data->narcs_ = hdr.NumArcs();
  428. if (arc_compactor.Size() == -1) {
  429. if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
  430. LOG(ERROR) << "CompactArcStore::Read: Alignment failed: " << opts.source;
  431. return nullptr;
  432. }
  433. auto b = (data->nstates_ + 1) * sizeof(Unsigned);
  434. data->states_region_.reset(MappedFile::Map(
  435. strm, opts.mode == FstReadOptions::MAP, opts.source, b));
  436. if (!strm || !data->states_region_) {
  437. LOG(ERROR) << "CompactArcStore::Read: Read failed: " << opts.source;
  438. return nullptr;
  439. }
  440. data->states_ =
  441. static_cast<Unsigned *>(data->states_region_->mutable_data());
  442. } else {
  443. data->states_ = nullptr;
  444. }
  445. data->ncompacts_ = arc_compactor.Size() == -1
  446. ? data->states_[data->nstates_]
  447. : data->nstates_ * arc_compactor.Size();
  448. if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
  449. LOG(ERROR) << "CompactArcStore::Read: Alignment failed: " << opts.source;
  450. return nullptr;
  451. }
  452. size_t b = data->ncompacts_ * sizeof(Element);
  453. data->compacts_region_.reset(
  454. MappedFile::Map(strm, opts.mode == FstReadOptions::MAP, opts.source, b));
  455. if (!strm || !data->compacts_region_) {
  456. LOG(ERROR) << "CompactArcStore::Read: Read failed: " << opts.source;
  457. return nullptr;
  458. }
  459. data->compacts_ =
  460. static_cast<Element *>(data->compacts_region_->mutable_data());
  461. return data.release();
  462. }
  463. template <class Element, class Unsigned>
  464. bool CompactArcStore<Element, Unsigned>::Write(
  465. std::ostream &strm, const FstWriteOptions &opts) const {
  466. if (states_) {
  467. if (opts.align && !AlignOutput(strm)) {
  468. LOG(ERROR) << "CompactArcStore::Write: Alignment failed: " << opts.source;
  469. return false;
  470. }
  471. strm.write(reinterpret_cast<const char *>(states_),
  472. (nstates_ + 1) * sizeof(Unsigned));
  473. }
  474. if (opts.align && !AlignOutput(strm)) {
  475. LOG(ERROR) << "CompactArcStore::Write: Alignment failed: " << opts.source;
  476. return false;
  477. }
  478. strm.write(reinterpret_cast<const char *>(compacts_),
  479. ncompacts_ * sizeof(Element));
  480. strm.flush();
  481. if (!strm) {
  482. LOG(ERROR) << "CompactArcStore::Write: Write failed: " << opts.source;
  483. return false;
  484. }
  485. return true;
  486. }
  487. template <class Element, class Unsigned>
  488. const std::string &CompactArcStore<Element, Unsigned>::Type() {
  489. static const std::string *const type = new std::string("compact");
  490. return *type;
  491. }
  492. template <class C, class U, class S>
  493. class CompactArcState;
  494. // Wraps an old-style arc compactor and a compact store as a new Fst compactor.
  495. // The copy constructors of AC and S must make thread-safe copies and should
  496. // be O(1).
  497. template <class AC, class U,
  498. class S /*= CompactArcStore<typename AC::Element, U>*/>
  499. class CompactArcCompactor {
  500. public:
  501. using ArcCompactor = AC;
  502. using Unsigned = U;
  503. using CompactStore = S;
  504. using Element = typename AC::Element;
  505. using Arc = typename AC::Arc;
  506. using StateId = typename Arc::StateId;
  507. using Weight = typename Arc::Weight;
  508. using State = CompactArcState<AC, U, S>;
  509. friend State;
  510. CompactArcCompactor() : arc_compactor_(nullptr), compact_store_(nullptr) {}
  511. // Constructs from Fst.
  512. explicit CompactArcCompactor(const Fst<Arc> &fst,
  513. ArcCompactor &&arc_compactor = ArcCompactor())
  514. : CompactArcCompactor(
  515. fst, std::make_shared<ArcCompactor>(std::move(arc_compactor))) {}
  516. CompactArcCompactor(const Fst<Arc> &fst,
  517. std::shared_ptr<ArcCompactor> arc_compactor)
  518. : arc_compactor_(std::move(arc_compactor)),
  519. compact_store_(std::make_shared<S>(fst, *arc_compactor_)) {}
  520. CompactArcCompactor(const Fst<Arc> &fst,
  521. std::shared_ptr<CompactArcCompactor> compactor)
  522. : arc_compactor_(compactor->arc_compactor_),
  523. compact_store_(compactor->compact_store_ == nullptr
  524. ? std::make_shared<S>(fst, *arc_compactor_)
  525. : compactor->compact_store_) {}
  526. // Constructs from CompactStore.
  527. CompactArcCompactor(std::shared_ptr<ArcCompactor> arc_compactor,
  528. std::shared_ptr<CompactStore> compact_store)
  529. : arc_compactor_(std::move(arc_compactor)),
  530. compact_store_(std::move(compact_store)) {}
  531. // The following 2 constructors take as input two iterators delimiting a set
  532. // of (already) compacted transitions, starting with the transitions out of
  533. // the initial state. The format of the input differs for fixed out-degree
  534. // and variable out-degree arc compactors.
  535. //
  536. // - For fixed out-degree arc compactors, the final weight (encoded as a
  537. // compacted transition) needs to be given only for final states. All strings
  538. // (arc compactor of size 1) will be assume to be terminated by a final state
  539. // even when the final state is not implicitely given.
  540. //
  541. // - For variable out-degree arc compactors, the final weight (encoded as a
  542. // compacted transition) needs to be given for all states and must appeared
  543. // first in the list (for state s, final weight of s, followed by outgoing
  544. // transitons in s).
  545. //
  546. // These 2 constructors allows the direct construction of a CompactArcFst
  547. // without first creating a more memory-hungry regular FST. This is useful
  548. // when memory usage is severely constrained.
  549. //
  550. // Usage:
  551. // CompactArcFst<...> fst(
  552. // std::make_shared<CompactArcFst<...>::Compactor>(b, e));
  553. template <class Iterator>
  554. CompactArcCompactor(const Iterator b, const Iterator e,
  555. std::shared_ptr<ArcCompactor> arc_compactor)
  556. : arc_compactor_(std::move(arc_compactor)),
  557. compact_store_(std::make_shared<S>(b, e, *arc_compactor_)) {}
  558. template <class Iterator>
  559. CompactArcCompactor(const Iterator b, const Iterator e)
  560. : CompactArcCompactor(b, e, std::make_shared<ArcCompactor>()) {}
  561. // Copy constructor. This makes a thread-safe copy, so requires that
  562. // The ArcCompactor and CompactStore copy constructors make thread-safe
  563. // copies.
  564. CompactArcCompactor(const CompactArcCompactor &compactor)
  565. : arc_compactor_(
  566. compactor.GetArcCompactor() == nullptr
  567. ? nullptr
  568. : std::make_shared<ArcCompactor>(*compactor.GetArcCompactor())),
  569. compact_store_(compactor.GetCompactStore() == nullptr
  570. ? nullptr
  571. : std::make_shared<CompactStore>(
  572. *compactor.GetCompactStore())) {}
  573. template <class OtherC>
  574. explicit CompactArcCompactor(
  575. const CompactArcCompactor<OtherC, U, S> &compactor)
  576. : arc_compactor_(
  577. compactor.GetArcCompactor() == nullptr
  578. ? nullptr
  579. : std::make_shared<ArcCompactor>(*compactor.GetArcCompactor())),
  580. compact_store_(compactor.GetCompactStore() == nullptr
  581. ? nullptr
  582. : std::make_shared<CompactStore>(
  583. *compactor.GetCompactStore())) {}
  584. StateId Start() const { return compact_store_->Start(); }
  585. StateId NumStates() const { return compact_store_->NumStates(); }
  586. size_t NumArcs() const { return compact_store_->NumArcs(); }
  587. void SetState(StateId s, State *state) const {
  588. if (state->GetStateId() != s) state->Set(this, s);
  589. }
  590. static CompactArcCompactor *Read(std::istream &strm,
  591. const FstReadOptions &opts,
  592. const FstHeader &hdr) {
  593. std::shared_ptr<ArcCompactor> arc_compactor(ArcCompactor::Read(strm));
  594. if (arc_compactor == nullptr) return nullptr;
  595. std::shared_ptr<S> compact_store(S::Read(strm, opts, hdr, *arc_compactor));
  596. if (compact_store == nullptr) return nullptr;
  597. return new CompactArcCompactor(arc_compactor, compact_store);
  598. }
  599. bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
  600. return arc_compactor_->Write(strm) && compact_store_->Write(strm, opts);
  601. }
  602. uint64_t Properties(uint64_t props) const {
  603. // ArcCompactor properties can just be or-ed in since it is assumed that
  604. // if the ArcCompactor sets a property, any FST with the inverse
  605. // property is incompatible.
  606. return arc_compactor_->Properties() | props;
  607. }
  608. bool IsCompatible(const Fst<Arc> &fst) const {
  609. return arc_compactor_->Compatible(fst);
  610. }
  611. bool Error() const { return compact_store_->Error(); }
  612. bool HasFixedOutdegree() const { return arc_compactor_->Size() != -1; }
  613. static const std::string &Type() {
  614. static const std::string *const type = [] {
  615. std::string type = "compact";
  616. if (sizeof(U) != sizeof(uint32_t)) type += std::to_string(8 * sizeof(U));
  617. type += "_";
  618. type += ArcCompactor::Type();
  619. if (CompactStore::Type() != "compact") {
  620. type += "_";
  621. type += CompactStore::Type();
  622. }
  623. return new std::string(type);
  624. }();
  625. return *type;
  626. }
  627. const ArcCompactor *GetArcCompactor() const { return arc_compactor_.get(); }
  628. const CompactStore *GetCompactStore() const { return compact_store_.get(); }
  629. ArcCompactor *MutableArcCompactor() { return arc_compactor_.get(); }
  630. CompactStore *MutableCompactStore() { return compact_store_.get(); }
  631. std::shared_ptr<ArcCompactor> SharedArcCompactor() { return arc_compactor_; }
  632. std::shared_ptr<CompactStore> SharedCompactStore() { return compact_store_; }
  633. // TODO(allauzen): remove dependencies on this method and make private.
  634. Arc ComputeArc(StateId s, Unsigned i, uint8_t flags) const {
  635. return arc_compactor_->Expand(s, compact_store_->Compacts(i), flags);
  636. }
  637. private:
  638. std::pair<Unsigned, Unsigned> CompactsRange(StateId s) const {
  639. std::pair<size_t, size_t> range;
  640. if (HasFixedOutdegree()) {
  641. range.first = s * arc_compactor_->Size();
  642. range.second = arc_compactor_->Size();
  643. } else {
  644. range.first = compact_store_->States(s);
  645. range.second = compact_store_->States(s + 1) - range.first;
  646. }
  647. return range;
  648. }
  649. private:
  650. std::shared_ptr<ArcCompactor> arc_compactor_;
  651. std::shared_ptr<CompactStore> compact_store_;
  652. };
  653. // Default implementation of state attributes accessor class for
  654. // CompactArcCompactor. Use of efficient specialization strongly encouraged.
  655. template <class ArcCompactor, class U, class S>
  656. class CompactArcState {
  657. public:
  658. using Arc = typename ArcCompactor::Arc;
  659. using StateId = typename Arc::StateId;
  660. using Weight = typename Arc::Weight;
  661. using Compactor = CompactArcCompactor<ArcCompactor, U, S>;
  662. CompactArcState() = default;
  663. CompactArcState(const Compactor *compactor, StateId s)
  664. : compactor_(compactor),
  665. s_(s),
  666. range_(compactor->CompactsRange(s)),
  667. has_final_(
  668. range_.second != 0 &&
  669. compactor->ComputeArc(s, range_.first, kArcILabelValue).ilabel ==
  670. kNoLabel) {
  671. if (has_final_) {
  672. ++range_.first;
  673. --range_.second;
  674. }
  675. }
  676. void Set(const Compactor *compactor, StateId s) {
  677. compactor_ = compactor;
  678. s_ = s;
  679. range_ = compactor->CompactsRange(s);
  680. if (range_.second != 0 &&
  681. compactor->ComputeArc(s, range_.first, kArcILabelValue).ilabel ==
  682. kNoLabel) {
  683. has_final_ = true;
  684. ++range_.first;
  685. --range_.second;
  686. } else {
  687. has_final_ = false;
  688. }
  689. }
  690. StateId GetStateId() const { return s_; }
  691. Weight Final() const {
  692. if (!has_final_) return Weight::Zero();
  693. return compactor_->ComputeArc(s_, range_.first - 1, kArcWeightValue).weight;
  694. }
  695. size_t NumArcs() const { return range_.second; }
  696. Arc GetArc(size_t i, uint8_t flags) const {
  697. return compactor_->ComputeArc(s_, range_.first + i, flags);
  698. }
  699. private:
  700. const Compactor *compactor_ = nullptr; // borrowed ref.
  701. StateId s_ = kNoStateId;
  702. std::pair<U, U> range_ = {0, 0};
  703. bool has_final_ = false;
  704. };
  705. // Specialization for CompactArcStore.
  706. template <class ArcCompactor, class U>
  707. class CompactArcState<ArcCompactor, U,
  708. CompactArcStore<typename ArcCompactor::Element, U>> {
  709. public:
  710. using Arc = typename ArcCompactor::Arc;
  711. using StateId = typename Arc::StateId;
  712. using Weight = typename Arc::Weight;
  713. using CompactStore = CompactArcStore<typename ArcCompactor::Element, U>;
  714. using Compactor = CompactArcCompactor<ArcCompactor, U, CompactStore>;
  715. CompactArcState() = default;
  716. CompactArcState(const Compactor *compactor, StateId s)
  717. : arc_compactor_(compactor->GetArcCompactor()), s_(s) {
  718. Init(compactor);
  719. }
  720. void Set(const Compactor *compactor, StateId s) {
  721. arc_compactor_ = compactor->GetArcCompactor();
  722. s_ = s;
  723. has_final_ = false;
  724. Init(compactor);
  725. }
  726. StateId GetStateId() const { return s_; }
  727. Weight Final() const {
  728. if (!has_final_) return Weight::Zero();
  729. return arc_compactor_->Expand(s_, *(compacts_ - 1), kArcWeightValue).weight;
  730. }
  731. size_t NumArcs() const { return num_arcs_; }
  732. Arc GetArc(size_t i, uint8_t flags) const {
  733. return arc_compactor_->Expand(s_, compacts_[i], flags);
  734. }
  735. private:
  736. void Init(const Compactor *compactor) {
  737. const auto *store = compactor->GetCompactStore();
  738. U offset;
  739. if (!compactor->HasFixedOutdegree()) { // Variable out-degree compactor.
  740. offset = store->States(s_);
  741. num_arcs_ = store->States(s_ + 1) - offset;
  742. } else { // Fixed out-degree compactor.
  743. offset = s_ * arc_compactor_->Size();
  744. num_arcs_ = arc_compactor_->Size();
  745. }
  746. if (num_arcs_ > 0) {
  747. compacts_ = &(store->Compacts(offset));
  748. if (arc_compactor_->Expand(s_, *compacts_, kArcILabelValue).ilabel ==
  749. kNoStateId) {
  750. ++compacts_;
  751. --num_arcs_;
  752. has_final_ = true;
  753. }
  754. }
  755. }
  756. private:
  757. const ArcCompactor *arc_compactor_ = nullptr; // Borrowed reference.
  758. const typename ArcCompactor::Element *compacts_ =
  759. nullptr; // Borrowed reference.
  760. StateId s_ = kNoStateId;
  761. U num_arcs_ = 0;
  762. bool has_final_ = false;
  763. };
  764. template <class F, class G>
  765. void Cast(const F &, G *);
  766. template <class CompactArcFST, class FST>
  767. bool WriteCompactArcFst(
  768. const FST &fst,
  769. const typename CompactArcFST::Compactor::ArcCompactor &arc_compactor,
  770. std::ostream &strm, const FstWriteOptions &opts);
  771. namespace internal {
  772. // Implementation class for CompactFst, which contains parametrizeable
  773. // Fst data storage (CompactArcStore by default) and Fst cache.
  774. // C's copy constructor must make a thread-safe copy.
  775. template <class Arc, class C, class CacheStore = DefaultCacheStore<Arc>>
  776. class CompactFstImpl
  777. : public CacheBaseImpl<typename CacheStore::State, CacheStore> {
  778. public:
  779. using Weight = typename Arc::Weight;
  780. using StateId = typename Arc::StateId;
  781. using Compactor = C;
  782. using FstImpl<Arc>::SetType;
  783. using FstImpl<Arc>::SetProperties;
  784. using FstImpl<Arc>::Properties;
  785. using FstImpl<Arc>::SetInputSymbols;
  786. using FstImpl<Arc>::SetOutputSymbols;
  787. using FstImpl<Arc>::WriteHeader;
  788. using ImplBase = CacheBaseImpl<typename CacheStore::State, CacheStore>;
  789. using ImplBase::HasArcs;
  790. using ImplBase::HasFinal;
  791. using ImplBase::HasStart;
  792. using ImplBase::PushArc;
  793. using ImplBase::SetArcs;
  794. using ImplBase::SetFinal;
  795. using ImplBase::SetStart;
  796. CompactFstImpl()
  797. : ImplBase(CompactFstOptions()),
  798. compactor_(std::make_shared<Compactor>()) {
  799. SetType(Compactor::Type());
  800. SetProperties(kNullProperties | kStaticProperties);
  801. }
  802. // Constructs a CompactFstImpl, creating a new Compactor using
  803. // Compactor(fst, compactor); this uses the compactor arg only for optional
  804. // information, such as compression level. See the Compactor interface
  805. // description.
  806. CompactFstImpl(const Fst<Arc> &fst, std::shared_ptr<Compactor> compactor,
  807. const CompactFstOptions &opts)
  808. : ImplBase(opts),
  809. compactor_(std::make_shared<Compactor>(fst, std::move(compactor))) {
  810. SetType(Compactor::Type());
  811. SetInputSymbols(fst.InputSymbols());
  812. SetOutputSymbols(fst.OutputSymbols());
  813. if (compactor_->Error()) SetProperties(kError, kError);
  814. uint64_t copy_properties =
  815. fst.Properties(kMutable, false)
  816. ? fst.Properties(kCopyProperties, true)
  817. : CheckProperties(
  818. fst, kCopyProperties & ~kWeightedCycles & ~kUnweightedCycles,
  819. kCopyProperties);
  820. if ((copy_properties & kError) || !compactor_->IsCompatible(fst)) {
  821. FSTERROR() << "CompactFstImpl: Input Fst incompatible with compactor";
  822. SetProperties(kError, kError);
  823. return;
  824. }
  825. SetProperties(compactor_->Properties(copy_properties) | kStaticProperties);
  826. }
  827. CompactFstImpl(std::shared_ptr<Compactor> compactor,
  828. const CompactFstOptions &opts)
  829. : ImplBase(opts), compactor_(std::move(compactor)) {
  830. SetType(Compactor::Type());
  831. SetProperties(kStaticProperties | compactor_->Properties(0));
  832. if (compactor_->Error()) SetProperties(kError, kError);
  833. }
  834. // Makes a thread-safe copy; requires that Compactor's copy constructor
  835. // does so as well.
  836. CompactFstImpl(const CompactFstImpl &impl)
  837. : ImplBase(impl),
  838. compactor_(impl.compactor_ == nullptr
  839. ? std::make_shared<Compactor>()
  840. : std::make_shared<Compactor>(*impl.compactor_)) {
  841. SetType(impl.Type());
  842. SetProperties(impl.Properties());
  843. SetInputSymbols(impl.InputSymbols());
  844. SetOutputSymbols(impl.OutputSymbols());
  845. }
  846. // Allows to change the cache store from OtherCacheStore to CacheStore.
  847. template <class OtherCacheStore>
  848. explicit CompactFstImpl(
  849. const CompactFstImpl<Arc, Compactor, OtherCacheStore> &impl)
  850. : ImplBase(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
  851. compactor_(impl.compactor_ == nullptr
  852. ? std::make_shared<Compactor>()
  853. : std::make_shared<Compactor>(*impl.compactor_)) {
  854. SetType(impl.Type());
  855. SetProperties(impl.Properties());
  856. SetInputSymbols(impl.InputSymbols());
  857. SetOutputSymbols(impl.OutputSymbols());
  858. }
  859. StateId Start() {
  860. if (!HasStart()) SetStart(compactor_->Start());
  861. return ImplBase::Start();
  862. }
  863. Weight Final(StateId s) {
  864. if (HasFinal(s)) return ImplBase::Final(s);
  865. compactor_->SetState(s, &state_);
  866. return state_.Final();
  867. }
  868. StateId NumStates() const {
  869. if (Properties(kError)) return 0;
  870. return compactor_->NumStates();
  871. }
  872. size_t NumArcs(StateId s) {
  873. if (HasArcs(s)) return ImplBase::NumArcs(s);
  874. compactor_->SetState(s, &state_);
  875. return state_.NumArcs();
  876. }
  877. size_t NumInputEpsilons(StateId s) {
  878. if (!HasArcs(s) && !Properties(kILabelSorted)) Expand(s);
  879. if (HasArcs(s)) return ImplBase::NumInputEpsilons(s);
  880. return CountEpsilons(s, false);
  881. }
  882. size_t NumOutputEpsilons(StateId s) {
  883. if (!HasArcs(s) && !Properties(kOLabelSorted)) Expand(s);
  884. if (HasArcs(s)) return ImplBase::NumOutputEpsilons(s);
  885. return CountEpsilons(s, true);
  886. }
  887. size_t CountEpsilons(StateId s, bool output_epsilons) {
  888. compactor_->SetState(s, &state_);
  889. const uint8_t flags = output_epsilons ? kArcOLabelValue : kArcILabelValue;
  890. size_t num_eps = 0;
  891. const size_t num_arcs = state_.NumArcs();
  892. for (size_t i = 0; i < num_arcs; ++i) {
  893. const auto &arc = state_.GetArc(i, flags);
  894. const auto label = output_epsilons ? arc.olabel : arc.ilabel;
  895. if (label == 0) {
  896. ++num_eps;
  897. } else if (label > 0) {
  898. break;
  899. }
  900. }
  901. return num_eps;
  902. }
  903. static CompactFstImpl *Read(std::istream &strm, const FstReadOptions &opts) {
  904. auto impl = std::make_unique<CompactFstImpl>();
  905. FstHeader hdr;
  906. if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
  907. return nullptr;
  908. }
  909. // Ensures compatibility.
  910. if (hdr.Version() == kAlignedFileVersion) {
  911. hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
  912. }
  913. impl->compactor_ =
  914. std::shared_ptr<Compactor>(Compactor::Read(strm, opts, hdr));
  915. if (!impl->compactor_) {
  916. return nullptr;
  917. }
  918. return impl.release();
  919. }
  920. bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
  921. FstHeader hdr;
  922. hdr.SetStart(compactor_->Start());
  923. hdr.SetNumStates(compactor_->NumStates());
  924. hdr.SetNumArcs(compactor_->NumArcs());
  925. // Ensures compatibility.
  926. const auto file_version = opts.align ? kAlignedFileVersion : kFileVersion;
  927. WriteHeader(strm, opts, file_version, &hdr);
  928. return compactor_->Write(strm, opts);
  929. }
  930. // Provides information needed for generic state iterator.
  931. void InitStateIterator(StateIteratorData<Arc> *data) const {
  932. data->base = nullptr;
  933. data->nstates = compactor_->NumStates();
  934. }
  935. void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) {
  936. if (!HasArcs(s)) Expand(s);
  937. ImplBase::InitArcIterator(s, data);
  938. }
  939. void Expand(StateId s) {
  940. compactor_->SetState(s, &state_);
  941. const size_t num_arcs = state_.NumArcs();
  942. for (size_t i = 0; i < num_arcs; ++i)
  943. PushArc(s, state_.GetArc(i, kArcValueFlags));
  944. SetArcs(s);
  945. if (!HasFinal(s)) SetFinal(s, state_.Final());
  946. }
  947. const Compactor *GetCompactor() const { return compactor_.get(); }
  948. Compactor *MutableCompactor() { return compactor_.get(); }
  949. std::shared_ptr<Compactor> SharedCompactor() { return compactor_; }
  950. void SetCompactor(std::shared_ptr<Compactor> compactor) {
  951. // TODO(allauzen): is this correct? is this needed?
  952. // TODO(allauzen): consider removing and forcing this through direct calls
  953. // to compactor.
  954. compactor_ = std::move(compactor);
  955. }
  956. // Properties always true of this FST class.
  957. static constexpr uint64_t kStaticProperties = kExpanded;
  958. protected:
  959. template <class OtherArc, class OtherCompactor, class OtherCacheStore>
  960. explicit CompactFstImpl(
  961. const CompactFstImpl<OtherArc, OtherCompactor, OtherCacheStore> &impl)
  962. : compactor_(std::make_shared<Compactor>(*impl.GetCompactor())) {
  963. SetType(impl.Type());
  964. SetProperties(impl.Properties());
  965. SetInputSymbols(impl.InputSymbols());
  966. SetOutputSymbols(impl.OutputSymbols());
  967. }
  968. private:
  969. // For k*Version constants.
  970. template <class CompactArcFST, class FST>
  971. friend bool ::fst::WriteCompactArcFst(
  972. const FST &fst,
  973. const typename CompactArcFST::Compactor::ArcCompactor &arc_compactor,
  974. std::ostream &strm, const FstWriteOptions &opts);
  975. // Current unaligned file format version.
  976. static constexpr int kFileVersion = 2;
  977. // Current aligned file format version.
  978. static constexpr int kAlignedFileVersion = 1;
  979. // Minimum file format version supported.
  980. static constexpr int kMinFileVersion = 1;
  981. std::shared_ptr<Compactor> compactor_;
  982. typename Compactor::State state_;
  983. };
  984. // Returns the compactor for the CompactFst; intended to be called as
  985. // GetCompactor<CompactorType>(fst), which returns the compactor only if it
  986. // is of the specified type and otherwise nullptr (via the overload below).
  987. template <class Compactor, class Arc>
  988. const Compactor *GetCompactor(const CompactFst<Arc, Compactor> &fst) {
  989. return fst.GetCompactor();
  990. }
  991. template <class Compactor, class Arc>
  992. const Compactor *GetCompactor(const Fst<Arc> &fst) {
  993. return nullptr;
  994. }
  995. } // namespace internal
  996. // This class attaches interface to implementation and handles reference
  997. // counting, delegating most methods to ImplToExpandedFst.
  998. // (Template argument defaults are declared in fst-decl.h.)
  999. template <class A, class C, class CacheStore>
  1000. class CompactFst
  1001. : public ImplToExpandedFst<internal::CompactFstImpl<A, C, CacheStore>> {
  1002. public:
  1003. template <class F, class G>
  1004. void friend Cast(const F &, G *);
  1005. using Arc = A;
  1006. using StateId = typename Arc::StateId;
  1007. using Compactor = C;
  1008. using Impl = internal::CompactFstImpl<Arc, Compactor, CacheStore>;
  1009. using Store = CacheStore; // for CacheArcIterator
  1010. friend class StateIterator<CompactFst>;
  1011. friend class ArcIterator<CompactFst>;
  1012. CompactFst() : ImplToExpandedFst<Impl>(std::make_shared<Impl>()) {}
  1013. explicit CompactFst(const Fst<Arc> &fst,
  1014. const CompactFstOptions &opts = CompactFstOptions())
  1015. : CompactFst(fst, std::make_shared<Compactor>(fst), opts) {}
  1016. // Constructs a CompactFst, creating a new Compactor using
  1017. // Compactor(fst, compactor); this uses the compactor arg only for optional
  1018. // information, such as compression level. See the Compactor interface
  1019. // description.
  1020. CompactFst(const Fst<Arc> &fst, std::shared_ptr<Compactor> compactor,
  1021. const CompactFstOptions &opts = CompactFstOptions())
  1022. : ImplToExpandedFst<Impl>(
  1023. std::make_shared<Impl>(fst, std::move(compactor), opts)) {}
  1024. // Convenience constructor taking a Compactor rvalue ref. Avoids
  1025. // clutter of make_shared<Compactor> at call site.
  1026. // Constructs a CompactFst, creating a new Compactor using
  1027. // Compactor(fst, compactor); this uses the compactor arg only for optional
  1028. // information, such as compression level. See the Compactor interface
  1029. // description.
  1030. CompactFst(const Fst<Arc> &fst, Compactor &&compactor,
  1031. const CompactFstOptions &opts = CompactFstOptions())
  1032. : CompactFst(fst, std::make_shared<Compactor>(std::move(compactor)),
  1033. opts) {}
  1034. explicit CompactFst(std::shared_ptr<Compactor> compactor,
  1035. const CompactFstOptions &opts = CompactFstOptions())
  1036. : ImplToExpandedFst<Impl>(
  1037. std::make_shared<Impl>(std::move(compactor), opts)) {}
  1038. // See Fst<>::Copy() for doc.
  1039. CompactFst(const CompactFst &fst, bool safe = false)
  1040. : ImplToExpandedFst<Impl>(fst, safe) {}
  1041. // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
  1042. CompactFst *Copy(bool safe = false) const override {
  1043. return new CompactFst(*this, safe);
  1044. }
  1045. // Read a CompactFst from an input stream; return nullptr on error
  1046. static CompactFst *Read(std::istream &strm, const FstReadOptions &opts) {
  1047. auto *impl = Impl::Read(strm, opts);
  1048. return impl ? new CompactFst(std::shared_ptr<Impl>(impl)) : nullptr;
  1049. }
  1050. // Read a CompactFst from a file; return nullptr on error
  1051. // Empty source reads from standard input
  1052. static CompactFst *Read(std::string_view source) {
  1053. auto *impl = ImplToExpandedFst<Impl>::Read(source);
  1054. return impl ? new CompactFst(std::shared_ptr<Impl>(impl)) : nullptr;
  1055. }
  1056. bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
  1057. return GetImpl()->Write(strm, opts);
  1058. }
  1059. bool Write(const std::string &source) const override {
  1060. return Fst<Arc>::WriteFile(source);
  1061. }
  1062. void InitStateIterator(StateIteratorData<Arc> *data) const override {
  1063. GetImpl()->InitStateIterator(data);
  1064. }
  1065. void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
  1066. GetMutableImpl()->InitArcIterator(s, data);
  1067. }
  1068. MatcherBase<Arc> *InitMatcher(MatchType match_type) const override {
  1069. return new SortedMatcher<CompactFst>(*this, match_type);
  1070. }
  1071. const Compactor *GetCompactor() const { return GetImpl()->GetCompactor(); }
  1072. void SetCompactor(std::shared_ptr<Compactor> compactor) {
  1073. GetMutableImpl()->SetCompactor(std::move(compactor));
  1074. }
  1075. private:
  1076. using ImplToFst<Impl, ExpandedFst<Arc>>::GetImpl;
  1077. using ImplToFst<Impl, ExpandedFst<Arc>>::GetMutableImpl;
  1078. explicit CompactFst(std::shared_ptr<Impl> impl)
  1079. : ImplToExpandedFst<Impl>(std::move(impl)) {}
  1080. CompactFst &operator=(const CompactFst &fst) = delete;
  1081. };
  1082. // Writes FST in ArcCompacted format, with a possible pass over the machine
  1083. // before writing to compute the number of states and arcs.
  1084. template <class CompactArcFST, class FST>
  1085. bool WriteCompactArcFst(
  1086. const FST &fst,
  1087. const typename CompactArcFST::Compactor::ArcCompactor &arc_compactor,
  1088. std::ostream &strm, const FstWriteOptions &opts) {
  1089. using Arc = typename CompactArcFST::Arc;
  1090. using Compactor = typename CompactArcFST::Compactor;
  1091. using ArcCompactor = typename Compactor::ArcCompactor;
  1092. using CompactStore = typename Compactor::CompactStore;
  1093. using Element = typename ArcCompactor::Element;
  1094. using Impl = typename CompactArcFST::Impl;
  1095. using Unsigned = typename Compactor::Unsigned;
  1096. using Weight = typename Arc::Weight;
  1097. const auto file_version =
  1098. opts.align ? Impl::kAlignedFileVersion : Impl::kFileVersion;
  1099. size_t num_arcs = -1;
  1100. size_t num_states = -1;
  1101. auto first_pass_arc_compactor = arc_compactor;
  1102. // Note that GetCompactor will only return non-null if the compactor has the
  1103. // exact type Compactor == CompactArcFst::Compactor. This is what we want;
  1104. // other types must do an extra pass to set the arc compactor state.
  1105. if (const Compactor *const compactor =
  1106. internal::GetCompactor<Compactor>(fst)) {
  1107. num_arcs = compactor->NumArcs();
  1108. num_states = compactor->NumStates();
  1109. first_pass_arc_compactor = *compactor->GetArcCompactor();
  1110. } else {
  1111. // A first pass is needed to compute the state of the compactor, which
  1112. // is saved ahead of the rest of the data structures. This unfortunately
  1113. // means forcing a complete double compaction when writing in this format.
  1114. // TODO(allauzen): eliminate mutable state from compactors.
  1115. num_arcs = 0;
  1116. num_states = 0;
  1117. for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
  1118. const auto s = siter.Value();
  1119. ++num_states;
  1120. if (fst.Final(s) != Weight::Zero()) {
  1121. first_pass_arc_compactor.Compact(
  1122. s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
  1123. }
  1124. for (ArcIterator<FST> aiter(fst, s); !aiter.Done(); aiter.Next()) {
  1125. ++num_arcs;
  1126. first_pass_arc_compactor.Compact(s, aiter.Value());
  1127. }
  1128. }
  1129. }
  1130. FstHeader hdr;
  1131. hdr.SetStart(fst.Start());
  1132. hdr.SetNumStates(num_states);
  1133. hdr.SetNumArcs(num_arcs);
  1134. std::string type = "compact";
  1135. if (sizeof(Unsigned) != sizeof(uint32_t)) {
  1136. type += std::to_string(CHAR_BIT * sizeof(Unsigned));
  1137. }
  1138. type += "_";
  1139. type += ArcCompactor::Type();
  1140. if (CompactStore::Type() != "compact") {
  1141. type += "_";
  1142. type += CompactStore::Type();
  1143. }
  1144. const auto copy_properties = fst.Properties(kCopyProperties, true);
  1145. if ((copy_properties & kError) || !arc_compactor.Compatible(fst)) {
  1146. FSTERROR() << "Fst incompatible with compactor";
  1147. return false;
  1148. }
  1149. uint64_t properties = copy_properties | Impl::kStaticProperties;
  1150. internal::FstImpl<Arc>::WriteFstHeader(fst, strm, opts, file_version, type,
  1151. properties, &hdr);
  1152. first_pass_arc_compactor.Write(strm);
  1153. if (first_pass_arc_compactor.Size() == -1) {
  1154. if (opts.align && !AlignOutput(strm)) {
  1155. LOG(ERROR) << "WriteCompactArcFst: Alignment failed: " << opts.source;
  1156. return false;
  1157. }
  1158. Unsigned compacts = 0;
  1159. for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
  1160. const auto s = siter.Value();
  1161. strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
  1162. if (fst.Final(s) != Weight::Zero()) {
  1163. ++compacts;
  1164. }
  1165. compacts += fst.NumArcs(s);
  1166. }
  1167. strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
  1168. }
  1169. if (opts.align && !AlignOutput(strm)) {
  1170. LOG(ERROR) << "Could not align file during write after writing states";
  1171. }
  1172. const auto &second_pass_arc_compactor = arc_compactor;
  1173. Element element;
  1174. for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
  1175. const auto s = siter.Value();
  1176. if (fst.Final(s) != Weight::Zero()) {
  1177. element = second_pass_arc_compactor.Compact(
  1178. s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
  1179. strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
  1180. }
  1181. for (ArcIterator<FST> aiter(fst, s); !aiter.Done(); aiter.Next()) {
  1182. element = second_pass_arc_compactor.Compact(s, aiter.Value());
  1183. strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
  1184. }
  1185. }
  1186. strm.flush();
  1187. if (!strm) {
  1188. LOG(ERROR) << "WriteCompactArcFst: Write failed: " << opts.source;
  1189. return false;
  1190. }
  1191. return true;
  1192. }
  1193. // Specialization for CompactFst; see generic version in fst.h for sample
  1194. // usage (but use the CompactFst type!). This version should inline.
  1195. template <class Arc, class Compactor, class CacheStore>
  1196. class StateIterator<CompactFst<Arc, Compactor, CacheStore>> {
  1197. public:
  1198. using StateId = typename Arc::StateId;
  1199. explicit StateIterator(const CompactFst<Arc, Compactor, CacheStore> &fst)
  1200. : nstates_(fst.NumStates()), s_(0) {}
  1201. bool Done() const { return s_ >= nstates_; }
  1202. StateId Value() const { return s_; }
  1203. void Next() { ++s_; }
  1204. void Reset() { s_ = 0; }
  1205. private:
  1206. StateId nstates_;
  1207. StateId s_;
  1208. };
  1209. // Specialization for CompactFst. Never caches,
  1210. // always iterates over the underlying compact elements.
  1211. template <class Arc, class Compactor, class CacheStore>
  1212. class ArcIterator<CompactFst<Arc, Compactor, CacheStore>> {
  1213. public:
  1214. using StateId = typename Arc::StateId;
  1215. using State = typename Compactor::State;
  1216. ArcIterator(const CompactFst<Arc, Compactor, CacheStore> &fst, StateId s)
  1217. : state_(fst.GetMutableImpl()->MutableCompactor(), s),
  1218. pos_(0),
  1219. num_arcs_(state_.NumArcs()),
  1220. flags_(kArcValueFlags) {}
  1221. bool Done() const { return pos_ >= num_arcs_; }
  1222. const Arc &Value() const {
  1223. arc_ = state_.GetArc(pos_, flags_);
  1224. return arc_;
  1225. }
  1226. void Next() { ++pos_; }
  1227. size_t Position() const { return pos_; }
  1228. void Reset() { pos_ = 0; }
  1229. void Seek(size_t pos) { pos_ = pos; }
  1230. uint8_t Flags() const { return flags_; }
  1231. void SetFlags(uint8_t flags, uint8_t mask) {
  1232. flags_ &= ~mask;
  1233. flags_ |= (flags & kArcValueFlags);
  1234. }
  1235. private:
  1236. State state_;
  1237. size_t pos_;
  1238. // Cache the value of NumArcs(), since it is used in Done() and may be slow.
  1239. size_t num_arcs_;
  1240. mutable Arc arc_;
  1241. uint8_t flags_;
  1242. };
  1243. // ArcCompactor for unweighted string FSTs.
  1244. template <class A>
  1245. class StringCompactor {
  1246. public:
  1247. using Arc = A;
  1248. using Label = typename Arc::Label;
  1249. using StateId = typename Arc::StateId;
  1250. using Weight = typename Arc::Weight;
  1251. using Element = Label;
  1252. Element Compact(StateId s, const Arc &arc) const { return arc.ilabel; }
  1253. Arc Expand(StateId s, const Element &p,
  1254. uint8_t flags = kArcValueFlags) const {
  1255. return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
  1256. }
  1257. constexpr ssize_t Size() const { return 1; }
  1258. constexpr uint64_t Properties() const { return kCompiledStringProperties; }
  1259. bool Compatible(const Fst<Arc> &fst) const {
  1260. const auto props = Properties();
  1261. return fst.Properties(props, true) == props;
  1262. }
  1263. static const std::string &Type() {
  1264. static const std::string *const type = new std::string("string");
  1265. return *type;
  1266. }
  1267. bool Write(std::ostream &strm) const { return true; }
  1268. static StringCompactor *Read(std::istream &strm) {
  1269. return new StringCompactor;
  1270. }
  1271. };
  1272. // ArcCompactor for weighted string FSTs.
  1273. template <class A>
  1274. class WeightedStringCompactor {
  1275. public:
  1276. using Arc = A;
  1277. using Label = typename Arc::Label;
  1278. using StateId = typename Arc::StateId;
  1279. using Weight = typename Arc::Weight;
  1280. using Element = std::pair<Label, Weight>;
  1281. Element Compact(StateId s, const Arc &arc) const {
  1282. return std::make_pair(arc.ilabel, arc.weight);
  1283. }
  1284. Arc Expand(StateId s, const Element &p,
  1285. uint8_t flags = kArcValueFlags) const {
  1286. return Arc(p.first, p.first, p.second,
  1287. p.first != kNoLabel ? s + 1 : kNoStateId);
  1288. }
  1289. constexpr ssize_t Size() const { return 1; }
  1290. constexpr uint64_t Properties() const { return kString | kAcceptor; }
  1291. bool Compatible(const Fst<Arc> &fst) const {
  1292. const auto props = Properties();
  1293. return fst.Properties(props, true) == props;
  1294. }
  1295. static const std::string &Type() {
  1296. static const std::string *const type = new std::string("weighted_string");
  1297. return *type;
  1298. }
  1299. bool Write(std::ostream &strm) const { return true; }
  1300. static WeightedStringCompactor *Read(std::istream &strm) {
  1301. return new WeightedStringCompactor;
  1302. }
  1303. };
  1304. // ArcCompactor for unweighted acceptor FSTs.
  1305. template <class A>
  1306. class UnweightedAcceptorCompactor {
  1307. public:
  1308. using Arc = A;
  1309. using Label = typename Arc::Label;
  1310. using StateId = typename Arc::StateId;
  1311. using Weight = typename Arc::Weight;
  1312. using Element = std::pair<Label, StateId>;
  1313. Element Compact(StateId s, const Arc &arc) const {
  1314. return std::make_pair(arc.ilabel, arc.nextstate);
  1315. }
  1316. Arc Expand(StateId s, const Element &p,
  1317. uint8_t flags = kArcValueFlags) const {
  1318. return Arc(p.first, p.first, Weight::One(), p.second);
  1319. }
  1320. constexpr ssize_t Size() const { return -1; }
  1321. constexpr uint64_t Properties() const { return kAcceptor | kUnweighted; }
  1322. bool Compatible(const Fst<Arc> &fst) const {
  1323. const auto props = Properties();
  1324. return fst.Properties(props, true) == props;
  1325. }
  1326. static const std::string &Type() {
  1327. static const std::string *const type =
  1328. new std::string("unweighted_acceptor");
  1329. return *type;
  1330. }
  1331. bool Write(std::ostream &strm) const { return true; }
  1332. static UnweightedAcceptorCompactor *Read(std::istream &istrm) {
  1333. return new UnweightedAcceptorCompactor;
  1334. }
  1335. };
  1336. // ArcCompactor for weighted acceptor FSTs.
  1337. template <class A>
  1338. class AcceptorCompactor {
  1339. public:
  1340. using Arc = A;
  1341. using Label = typename Arc::Label;
  1342. using StateId = typename Arc::StateId;
  1343. using Weight = typename Arc::Weight;
  1344. using Element = std::pair<std::pair<Label, Weight>, StateId>;
  1345. Element Compact(StateId s, const Arc &arc) const {
  1346. return std::make_pair(std::make_pair(arc.ilabel, arc.weight),
  1347. arc.nextstate);
  1348. }
  1349. Arc Expand(StateId s, const Element &p,
  1350. uint8_t flags = kArcValueFlags) const {
  1351. return Arc(p.first.first, p.first.first, p.first.second, p.second);
  1352. }
  1353. constexpr ssize_t Size() const { return -1; }
  1354. constexpr uint64_t Properties() const { return kAcceptor; }
  1355. bool Compatible(const Fst<Arc> &fst) const {
  1356. const auto props = Properties();
  1357. return fst.Properties(props, true) == props;
  1358. }
  1359. static const std::string &Type() {
  1360. static const std::string *const type = new std::string("acceptor");
  1361. return *type;
  1362. }
  1363. bool Write(std::ostream &strm) const { return true; }
  1364. static AcceptorCompactor *Read(std::istream &strm) {
  1365. return new AcceptorCompactor;
  1366. }
  1367. };
  1368. // ArcCompactor for unweighted FSTs.
  1369. template <class A>
  1370. class UnweightedCompactor {
  1371. public:
  1372. using Arc = A;
  1373. using Label = typename Arc::Label;
  1374. using StateId = typename Arc::StateId;
  1375. using Weight = typename Arc::Weight;
  1376. using Element = std::pair<std::pair<Label, Label>, StateId>;
  1377. Element Compact(StateId s, const Arc &arc) const {
  1378. return std::make_pair(std::make_pair(arc.ilabel, arc.olabel),
  1379. arc.nextstate);
  1380. }
  1381. Arc Expand(StateId s, const Element &p,
  1382. uint8_t flags = kArcValueFlags) const {
  1383. return Arc(p.first.first, p.first.second, Weight::One(), p.second);
  1384. }
  1385. constexpr ssize_t Size() const { return -1; }
  1386. constexpr uint64_t Properties() const { return kUnweighted; }
  1387. bool Compatible(const Fst<Arc> &fst) const {
  1388. const auto props = Properties();
  1389. return fst.Properties(props, true) == props;
  1390. }
  1391. static const std::string &Type() {
  1392. static const std::string *const type = new std::string("unweighted");
  1393. return *type;
  1394. }
  1395. bool Write(std::ostream &strm) const { return true; }
  1396. static UnweightedCompactor *Read(std::istream &strm) {
  1397. return new UnweightedCompactor;
  1398. }
  1399. };
  1400. template <class Arc, class Unsigned /* = uint32_t */>
  1401. using CompactStringFst = CompactArcFst<Arc, StringCompactor<Arc>, Unsigned>;
  1402. template <class Arc, class Unsigned /* = uint32_t */>
  1403. using CompactWeightedStringFst =
  1404. CompactArcFst<Arc, WeightedStringCompactor<Arc>, Unsigned>;
  1405. template <class Arc, class Unsigned /* = uint32_t */>
  1406. using CompactAcceptorFst = CompactArcFst<Arc, AcceptorCompactor<Arc>, Unsigned>;
  1407. template <class Arc, class Unsigned /* = uint32_t */>
  1408. using CompactUnweightedFst =
  1409. CompactArcFst<Arc, UnweightedCompactor<Arc>, Unsigned>;
  1410. template <class Arc, class Unsigned /* = uint32_t */>
  1411. using CompactUnweightedAcceptorFst =
  1412. CompactArcFst<Arc, UnweightedAcceptorCompactor<Arc>, Unsigned>;
  1413. using StdCompactStringFst = CompactStringFst<StdArc, uint32_t>;
  1414. using StdCompactWeightedStringFst = CompactWeightedStringFst<StdArc, uint32_t>;
  1415. using StdCompactAcceptorFst = CompactAcceptorFst<StdArc, uint32_t>;
  1416. using StdCompactUnweightedFst = CompactUnweightedFst<StdArc, uint32_t>;
  1417. using StdCompactUnweightedAcceptorFst =
  1418. CompactUnweightedAcceptorFst<StdArc, uint32_t>;
  1419. // Convenience function to make a CompactStringFst from a sequence
  1420. // of Arc::Labels. LabelIterator must be an input iterator.
  1421. template <class Arc, class Unsigned = uint32_t, class LabelIterator>
  1422. inline CompactStringFst<Arc, Unsigned> MakeCompactStringFst(
  1423. const LabelIterator begin, const LabelIterator end) {
  1424. using CompactStringFst = CompactStringFst<Arc, Unsigned>;
  1425. using Compactor = typename CompactStringFst::Compactor;
  1426. return CompactStringFst(std::make_shared<Compactor>(begin, end));
  1427. }
  1428. template <class LabelIterator>
  1429. inline StdCompactStringFst MakeStdCompactStringFst(const LabelIterator begin,
  1430. const LabelIterator end) {
  1431. return MakeCompactStringFst<StdArc>(begin, end);
  1432. }
  1433. } // namespace fst
  1434. #endif // FST_COMPACT_FST_H_