You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

723 lines
27 KiB

  1. // Copyright 2005-2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the 'License');
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an 'AS IS' BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // See www.openfst.org for extensive documentation on this weighted
  16. // finite-state transducer library.
  17. //
  18. // An FST implementation that allows non-destructive edit operations on an
  19. // existing FST.
  20. //
  21. // The EditFst class enables non-destructive edit operations on a wrapped
  22. // ExpandedFst. The implementation uses copy-on-write semantics at the node
  23. // level: if a user has an underlying FST on which they want to perform a
  24. // relatively small number of edits (read: mutations), then this implementation
  25. // will copy the edited node to an internal MutableFst and perform any edits in
  26. // situ on that copied node. This class supports all the methods of MutableFst
  27. // except for DeleteStates(const std::vector<StateId> &); thus, new nodes may
  28. // also be
  29. // added, and one may add transitions from existing nodes of the wrapped FST to
  30. // new nodes.
  31. //
  32. // N.B.: The documentation for Fst::Copy(true) says that its behavior is
  33. // undefined if invoked on an FST that has already been accessed. This class
  34. // requires that the Fst implementation it wraps provides consistent, reliable
  35. // behavior when its Copy(true) method is invoked, where consistent means
  36. // the graph structure, graph properties and state numbering and do not change.
  37. // VectorFst and CompactFst, for example, are both well-behaved in this regard.
  38. #ifndef FST_EDIT_FST_H_
  39. #define FST_EDIT_FST_H_
  40. #include <cstddef>
  41. #include <cstdint>
  42. #include <istream>
  43. #include <memory>
  44. #include <ostream>
  45. #include <string>
  46. #include <vector>
  47. #include <fst/log.h>
  48. #include <fst/cache.h>
  49. #include <fst/expanded-fst.h>
  50. #include <fst/fst.h>
  51. #include <fst/impl-to-fst.h>
  52. #include <fst/mutable-fst.h>
  53. #include <fst/properties.h>
  54. #include <fst/util.h>
  55. #include <fst/vector-fst.h>
  56. #include <unordered_map>
  57. #include <string_view>
  58. namespace fst {
  59. namespace internal {
  60. // The EditFstData class is a container for all mutable data for EditFstImpl;
  61. // also, this class provides most of the actual implementation of what EditFst
  62. // does (that is, most of EditFstImpl's methods delegate to methods in this, the
  63. // EditFstData class). Instances of this class are reference-counted and can be
  64. // shared between otherwise independent EditFstImpl instances. This scheme
  65. // allows EditFstImpl to implement the thread-safe, copy-on-write semantics
  66. // required by Fst::Copy(true).
  67. //
  68. // template parameters:
  69. // A: the type of arc to use
  70. // WrappedFstT: the type of FST wrapped by the EditFst instance that
  71. // this EditFstData instance is backing
  72. // MutableFstT: the type of mutable FST to use internally for edited states;
  73. // crucially, MutableFstT::Copy(false) *must* yield an FST that is
  74. // thread-safe for reading (VectorFst, for example, has this property)
  75. template <typename Arc, typename WrappedFstT = ExpandedFst<Arc>,
  76. typename MutableFstT = VectorFst<Arc>>
  77. class EditFstData {
  78. public:
  79. using StateId = typename Arc::StateId;
  80. using Weight = typename Arc::Weight;
  81. EditFstData() : num_new_states_(0) {}
  82. EditFstData(const EditFstData &other)
  83. : edits_(other.edits_),
  84. external_to_internal_ids_(other.external_to_internal_ids_),
  85. edited_final_weights_(other.edited_final_weights_),
  86. num_new_states_(other.num_new_states_) {}
  87. ~EditFstData() = default;
  88. static EditFstData *Read(std::istream &strm, const FstReadOptions &opts);
  89. bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
  90. // Serializes all private data members of this class.
  91. FstWriteOptions edits_opts(opts);
  92. edits_opts.write_header = true; // Forces writing contained header.
  93. edits_.Write(strm, edits_opts);
  94. WriteType(strm, external_to_internal_ids_);
  95. WriteType(strm, edited_final_weights_);
  96. WriteType(strm, num_new_states_);
  97. if (!strm) {
  98. LOG(ERROR) << "EditFstData::Write: Write failed: " << opts.source;
  99. return false;
  100. }
  101. return true;
  102. }
  103. StateId NumNewStates() const { return num_new_states_; }
  104. // Accessor methods for the FST holding edited states.
  105. StateId EditedStart() const { return edits_.Start(); }
  106. Weight Final(StateId s, const WrappedFstT *wrapped) const {
  107. auto final_weight_it = GetFinalWeightIterator(s);
  108. if (final_weight_it == NotInFinalWeightMap()) {
  109. const auto it = GetEditedIdMapIterator(s);
  110. return it == NotInEditedMap() ? wrapped->Final(s)
  111. : edits_.Final(it->second);
  112. } else {
  113. return final_weight_it->second;
  114. }
  115. }
  116. size_t NumArcs(StateId s, const WrappedFstT *wrapped) const {
  117. const auto it = GetEditedIdMapIterator(s);
  118. return it == NotInEditedMap() ? wrapped->NumArcs(s)
  119. : edits_.NumArcs(it->second);
  120. }
  121. size_t NumInputEpsilons(StateId s, const WrappedFstT *wrapped) const {
  122. const auto it = GetEditedIdMapIterator(s);
  123. return it == NotInEditedMap() ? wrapped->NumInputEpsilons(s)
  124. : edits_.NumInputEpsilons(it->second);
  125. }
  126. size_t NumOutputEpsilons(StateId s, const WrappedFstT *wrapped) const {
  127. const auto it = GetEditedIdMapIterator(s);
  128. return it == NotInEditedMap() ? wrapped->NumOutputEpsilons(s)
  129. : edits_.NumOutputEpsilons(it->second);
  130. }
  131. void SetEditedProperties(uint64_t props, uint64_t mask) {
  132. edits_.SetProperties(props, mask);
  133. }
  134. // Non-const MutableFst operations.
  135. // Sets the start state for this FST.
  136. void SetStart(StateId s) { edits_.SetStart(s); }
  137. // Sets the final state for this FST.
  138. Weight SetFinal(StateId s, Weight weight, const WrappedFstT *wrapped) {
  139. const auto old_weight = Final(s, wrapped);
  140. const auto it = GetEditedIdMapIterator(s);
  141. // If we haven't already edited state s, don't add it to edited_ (which can
  142. // be expensive if s has many transitions); just use the
  143. // edited_final_weights_ map.
  144. if (it == NotInEditedMap()) {
  145. edited_final_weights_[s] = weight;
  146. } else {
  147. edits_.SetFinal(GetEditableInternalId(s, wrapped), weight);
  148. }
  149. return old_weight;
  150. }
  151. // Adds a new state to this FST.
  152. StateId AddState(StateId curr_num_states) {
  153. external_to_internal_ids_[curr_num_states] = edits_.AddState();
  154. ++num_new_states_;
  155. return curr_num_states;
  156. }
  157. // Adds new states to this FST.
  158. void AddStates(StateId curr_num_states, size_t n) {
  159. for (size_t i = 0; i < n; ++i) {
  160. curr_num_states = AddState(curr_num_states);
  161. }
  162. }
  163. // Adds the specified arc to the specified state of this FST.
  164. const Arc *AddArc(StateId s, const Arc &arc, const WrappedFstT *wrapped) {
  165. const auto internal_id = GetEditableInternalId(s, wrapped);
  166. const auto num_arcs = edits_.NumArcs(internal_id);
  167. ArcIterator<MutableFstT> arc_it(edits_, internal_id);
  168. const Arc *prev_arc = nullptr;
  169. if (num_arcs > 0) {
  170. // Grabs the final arc associated with this state in edits_.
  171. arc_it.Seek(num_arcs - 1);
  172. prev_arc = &(arc_it.Value());
  173. }
  174. edits_.AddArc(internal_id, arc);
  175. return prev_arc;
  176. }
  177. void DeleteStates() {
  178. edits_.DeleteStates();
  179. num_new_states_ = 0;
  180. external_to_internal_ids_.clear();
  181. edited_final_weights_.clear();
  182. }
  183. // Removes all but the first n outgoing arcs of the specified state.
  184. void DeleteArcs(StateId s, size_t n, const WrappedFstT *wrapped) {
  185. edits_.DeleteArcs(GetEditableInternalId(s, wrapped), n);
  186. }
  187. // Removes all outgoing arcs from the specified state.
  188. void DeleteArcs(StateId s, const WrappedFstT *wrapped) {
  189. edits_.DeleteArcs(GetEditableInternalId(s, wrapped));
  190. }
  191. // End methods for non-const MutableFst operations.
  192. // Provides information for the generic arc iterator.
  193. void InitArcIterator(StateId s, ArcIteratorData<Arc> *data,
  194. const WrappedFstT *wrapped) const {
  195. const auto it = GetEditedIdMapIterator(s);
  196. if (it == NotInEditedMap()) {
  197. VLOG(3) << "EditFstData::InitArcIterator: iterating on state " << s
  198. << " of original FST";
  199. wrapped->InitArcIterator(s, data);
  200. } else {
  201. VLOG(2) << "EditFstData::InitArcIterator: iterating on edited state " << s
  202. << " (internal state ID: " << it->second << ")";
  203. edits_.InitArcIterator(it->second, data);
  204. }
  205. }
  206. // Provides information for the generic mutable arc iterator.
  207. void InitMutableArcIterator(StateId s, MutableArcIteratorData<Arc> *data,
  208. const WrappedFstT *wrapped) {
  209. data->base = std::make_unique<MutableArcIterator<MutableFstT>>(
  210. &edits_, GetEditableInternalId(s, wrapped));
  211. }
  212. // Prints out the map from external to internal state IDs (for debugging
  213. // purposes).
  214. void PrintMap() {
  215. for (auto it = external_to_internal_ids_.begin(); it != NotInEditedMap();
  216. ++it) {
  217. LOG(INFO) << "(external,internal)=(" << it->first << "," << it->second
  218. << ")";
  219. }
  220. }
  221. private:
  222. // Returns the iterator of the map from external to internal state IDs
  223. // of edits_ for the specified external state IDs.
  224. typename std::unordered_map<StateId, StateId>::const_iterator
  225. GetEditedIdMapIterator(StateId s) const {
  226. return external_to_internal_ids_.find(s);
  227. }
  228. typename std::unordered_map<StateId, StateId>::const_iterator
  229. NotInEditedMap() const {
  230. return external_to_internal_ids_.end();
  231. }
  232. typename std::unordered_map<StateId, Weight>::const_iterator
  233. GetFinalWeightIterator(StateId s) const {
  234. return edited_final_weights_.find(s);
  235. }
  236. typename std::unordered_map<StateId, Weight>::const_iterator
  237. NotInFinalWeightMap() const {
  238. return edited_final_weights_.end();
  239. }
  240. // Returns the internal state ID of the specified external ID if the state has
  241. // already been made editable, or else copies the state from wrapped_ to
  242. // edits_ and returns the state ID of the newly editable state in edits_.
  243. StateId GetEditableInternalId(StateId s, const WrappedFstT *wrapped) {
  244. auto id_map_it = GetEditedIdMapIterator(s);
  245. if (id_map_it == NotInEditedMap()) {
  246. StateId new_internal_id = edits_.AddState();
  247. VLOG(2) << "EditFstData::GetEditableInternalId: editing state " << s
  248. << " of original FST; new internal state id:" << new_internal_id;
  249. external_to_internal_ids_[s] = new_internal_id;
  250. for (ArcIterator<Fst<Arc>> arc_iterator(*wrapped, s);
  251. !arc_iterator.Done(); arc_iterator.Next()) {
  252. edits_.AddArc(new_internal_id, arc_iterator.Value());
  253. }
  254. // Copies the final weight.
  255. auto final_weight_it = GetFinalWeightIterator(s);
  256. if (final_weight_it == NotInFinalWeightMap()) {
  257. edits_.SetFinal(new_internal_id, wrapped->Final(s));
  258. } else {
  259. edits_.SetFinal(new_internal_id, final_weight_it->second);
  260. edited_final_weights_.erase(s);
  261. }
  262. return new_internal_id;
  263. } else {
  264. return id_map_it->second;
  265. }
  266. }
  267. // A mutable FST (by default, a VectorFst) to contain new states, and/or
  268. // copies of states from a wrapped ExpandedFst that have been modified in
  269. // some way.
  270. MutableFstT edits_;
  271. // A mapping from external state IDs to the internal IDs of states that
  272. // appear in edits_.
  273. std::unordered_map<StateId, StateId> external_to_internal_ids_;
  274. // A mapping from external state IDs to final state weights assigned to
  275. // those states. The states in this map are *only* those whose final weight
  276. // has been modified; if any other part of the state has been modified,
  277. // the entire state is copied to edits_, and all modifications reside there.
  278. std::unordered_map<StateId, Weight> edited_final_weights_;
  279. // The number of new states added to this mutable FST impl, which is <= the
  280. // number of states in edits_ (since edits_ contains both edited *and* new
  281. // states).
  282. StateId num_new_states_;
  283. };
  284. // EditFstData method implementations: just the Read method.
  285. template <typename A, typename WrappedFstT, typename MutableFstT>
  286. EditFstData<A, WrappedFstT, MutableFstT> *
  287. EditFstData<A, WrappedFstT, MutableFstT>::Read(std::istream &strm,
  288. const FstReadOptions &opts) {
  289. auto data = fst::make_unique_for_overwrite<EditFstData>();
  290. // Next read in MutabelFstT machine that stores edits
  291. FstReadOptions edits_opts(opts);
  292. // Contained header was written out, so read it in.
  293. edits_opts.header = nullptr;
  294. // Because our internal representation of edited states is a solid object
  295. // of type MutableFstT (defaults to VectorFst<A>) and not a pointer,
  296. // and because the static Read method allocates a new object on the heap,
  297. // we need to call Read, check if there was a failure, use
  298. // MutableFstT::operator= to assign the object (not the pointer) to the
  299. // edits_ data member (which will increase the ref count by 1 on the impl)
  300. // and, finally, delete the heap-allocated object.
  301. std::unique_ptr<MutableFstT> edits(MutableFstT::Read(strm, edits_opts));
  302. if (!edits) return nullptr;
  303. data->edits_ = *edits;
  304. edits.reset();
  305. // Finally, reads in rest of private data members.
  306. ReadType(strm, &data->external_to_internal_ids_);
  307. ReadType(strm, &data->edited_final_weights_);
  308. ReadType(strm, &data->num_new_states_);
  309. if (!strm) {
  310. LOG(ERROR) << "EditFst::Read: read failed: " << opts.source;
  311. return nullptr;
  312. }
  313. return data.release();
  314. }
  315. // This class enables non-destructive edit operations on a wrapped ExpandedFst.
  316. // The implementation uses copy-on-write semantics at the node level: if a user
  317. // has an underlying FST on which they want to perform a relatively small
  318. // number of edits (read: mutations), then this implementation will copy the
  319. // edited node to an internal MutableFst and perform any edits in situ on that
  320. // copied node. This class supports all the methods of MutableFst except for
  321. // DeleteStates(const std::vector<StateId> &); thus, new nodes may also be
  322. // added, and
  323. // one may add transitions from existing nodes of the wrapped FST to new nodes.
  324. //
  325. // template parameters:
  326. // A: the type of arc to use
  327. // WrappedFstT: the type of FST wrapped by the EditFst instance that
  328. // this EditFstImpl instance is backing
  329. // MutableFstT: the type of mutable FST to use internally for edited states;
  330. // crucially, MutableFstT::Copy(false) must yield an FST that is
  331. // thread-safe for reading (VectorFst, for example, has this property)
  332. template <typename A, typename WrappedFstT = ExpandedFst<A>,
  333. typename MutableFstT = VectorFst<A>>
  334. class EditFstImpl : public FstImpl<A> {
  335. public:
  336. using Arc = A;
  337. using StateId = typename Arc::StateId;
  338. using Weight = typename Arc::Weight;
  339. using FstImpl<Arc>::SetProperties;
  340. using FstImpl<Arc>::SetInputSymbols;
  341. using FstImpl<Arc>::SetOutputSymbols;
  342. using FstImpl<Arc>::WriteHeader;
  343. // Constructs an editable FST implementation with no states. Effectively, this
  344. // initially-empty FST will in every way mimic the behavior of a
  345. // VectorFst---more precisely, a VectorFstImpl instance---but with slightly
  346. // slower performance (by a constant factor), due to the fact that
  347. // this class maintains a mapping between external state id's and
  348. // their internal equivalents.
  349. EditFstImpl() : wrapped_(new MutableFstT()) {
  350. FstImpl<Arc>::SetType("edit");
  351. InheritPropertiesFromWrapped();
  352. data_ = std::make_shared<EditFstData<Arc, WrappedFstT, MutableFstT>>();
  353. }
  354. // Wraps the specified ExpandedFst. This constructor requires that the
  355. // specified Fst is an ExpandedFst instance. This requirement is only enforced
  356. // at runtime. (See below for the reason.)
  357. //
  358. // This library uses the pointer-to-implementation or "PIMPL" design pattern.
  359. // In particular, to make it convenient to bind an implementation class to its
  360. // interface, there are a pair of template "binder" classes, one for immutable
  361. // and one for mutable FSTs (ImplToFst and ImplToMutableFst, respectively).
  362. // As it happens, the API for the ImplToMutableFst<I,F> class requires that
  363. // the implementation class--the template parameter "I"--have a constructor
  364. // taking a const Fst<A> reference. Accordingly, the constructor here must
  365. // perform a down_cast to the WrappedFstT type required by EditFst and
  366. // therefore EditFstImpl.
  367. explicit EditFstImpl(const Fst<Arc> &wrapped)
  368. : wrapped_(down_cast<WrappedFstT *>(wrapped.Copy())) {
  369. FstImpl<Arc>::SetType("edit");
  370. data_ = std::make_shared<EditFstData<Arc, WrappedFstT, MutableFstT>>();
  371. // have edits_ inherit all properties from wrapped_
  372. data_->SetEditedProperties(wrapped_->Properties(kFstProperties, false),
  373. kFstProperties);
  374. InheritPropertiesFromWrapped();
  375. }
  376. // A copy constructor for this implementation class, used to implement
  377. // the Copy() method of the Fst interface.
  378. EditFstImpl(const EditFstImpl &impl)
  379. : FstImpl<Arc>(),
  380. wrapped_(down_cast<WrappedFstT *>(impl.wrapped_->Copy(true))),
  381. data_(impl.data_) {
  382. SetProperties(impl.Properties());
  383. }
  384. // const Fst/ExpandedFst operations, declared in the Fst and ExpandedFst
  385. // interfaces
  386. StateId Start() const {
  387. const auto edited_start = data_->EditedStart();
  388. return edited_start == kNoStateId ? wrapped_->Start() : edited_start;
  389. }
  390. Weight Final(StateId s) const { return data_->Final(s, wrapped_.get()); }
  391. size_t NumArcs(StateId s) const { return data_->NumArcs(s, wrapped_.get()); }
  392. size_t NumInputEpsilons(StateId s) const {
  393. return data_->NumInputEpsilons(s, wrapped_.get());
  394. }
  395. size_t NumOutputEpsilons(StateId s) const {
  396. return data_->NumOutputEpsilons(s, wrapped_.get());
  397. }
  398. StateId NumStates() const {
  399. return wrapped_->NumStates() + data_->NumNewStates();
  400. }
  401. static EditFstImpl *Read(std::istream &strm, const FstReadOptions &opts);
  402. bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
  403. FstHeader hdr;
  404. hdr.SetStart(Start());
  405. hdr.SetNumStates(NumStates());
  406. FstWriteOptions header_opts(opts);
  407. // Allows the contained FST to hold any symbols.
  408. header_opts.write_isymbols = false;
  409. header_opts.write_osymbols = false;
  410. WriteHeader(strm, header_opts, kFileVersion, &hdr);
  411. // Serializes the wrapped FST to stream.
  412. FstWriteOptions wrapped_opts(opts);
  413. // Forces writing the contained header.
  414. wrapped_opts.write_header = true;
  415. wrapped_->Write(strm, wrapped_opts);
  416. data_->Write(strm, opts);
  417. strm.flush();
  418. if (!strm) {
  419. LOG(ERROR) << "EditFst::Write: Write failed: " << opts.source;
  420. return false;
  421. }
  422. return true;
  423. }
  424. // Sets the start state for this FST.
  425. void SetStart(StateId s) {
  426. MutateCheck();
  427. data_->SetStart(s);
  428. SetProperties(SetStartProperties(FstImpl<Arc>::Properties()));
  429. }
  430. // Sets the final state for this FST.
  431. void SetFinal(StateId s, Weight weight) {
  432. MutateCheck();
  433. Weight old_weight = data_->SetFinal(s, weight, wrapped_.get());
  434. SetProperties(
  435. SetFinalProperties(FstImpl<Arc>::Properties(), old_weight, weight));
  436. }
  437. // Adds a new state to this FST.
  438. StateId AddState() {
  439. MutateCheck();
  440. SetProperties(AddStateProperties(FstImpl<Arc>::Properties()));
  441. return data_->AddState(NumStates());
  442. }
  443. // Adds new states to this FST.
  444. void AddStates(size_t n) {
  445. MutateCheck();
  446. SetProperties(AddStateProperties(FstImpl<Arc>::Properties()));
  447. return data_->AddStates(NumStates(), n);
  448. }
  449. // Adds the specified arc to the specified state of this FST.
  450. void AddArc(StateId s, const Arc &arc) {
  451. MutateCheck();
  452. const auto *prev_arc = data_->AddArc(s, arc, wrapped_.get());
  453. SetProperties(
  454. AddArcProperties(FstImpl<Arc>::Properties(), s, arc, prev_arc));
  455. }
  456. void DeleteStates(const std::vector<StateId> &dstates) {
  457. FSTERROR() << ": EditFstImpl::DeleteStates(const std::vector<StateId>&): "
  458. << " not implemented";
  459. SetProperties(kError, kError);
  460. }
  461. // Deletes all states in this FST.
  462. void DeleteStates();
  463. // Removes all but the first n outgoing arcs of the specified state.
  464. void DeleteArcs(StateId s, size_t n) {
  465. MutateCheck();
  466. data_->DeleteArcs(s, n, wrapped_.get());
  467. SetProperties(DeleteArcsProperties(FstImpl<Arc>::Properties()));
  468. }
  469. // Removes all outgoing arcs from the specified state.
  470. void DeleteArcs(StateId s) {
  471. MutateCheck();
  472. data_->DeleteArcs(s, wrapped_.get());
  473. SetProperties(DeleteArcsProperties(FstImpl<Arc>::Properties()));
  474. }
  475. void ReserveStates(StateId s) {}
  476. void ReserveArcs(StateId s, size_t n) {}
  477. // Ends non-const MutableFst operations.
  478. // Provides information for the generic state iterator.
  479. void InitStateIterator(StateIteratorData<Arc> *data) const {
  480. data->base = nullptr;
  481. data->nstates = NumStates();
  482. }
  483. // Provides information for the generic arc iterator.
  484. void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
  485. data_->InitArcIterator(s, data, wrapped_.get());
  486. }
  487. // Provides information for the generic mutable arc iterator.
  488. void InitMutableArcIterator(StateId s, MutableArcIteratorData<Arc> *data) {
  489. MutateCheck();
  490. data_->InitMutableArcIterator(s, data, wrapped_.get());
  491. }
  492. private:
  493. // Properties always true of this FST class.
  494. static constexpr uint64_t kStaticProperties = kExpanded | kMutable;
  495. // Current file format version.
  496. static constexpr int kFileVersion = 2;
  497. // Minimum file format version supported
  498. static constexpr int kMinFileVersion = 2;
  499. // Causes this FST to inherit all the properties from its wrapped FST, except
  500. // for the two properties that always apply to EditFst instances: kExpanded
  501. // and kMutable.
  502. void InheritPropertiesFromWrapped() {
  503. SetProperties(wrapped_->Properties(kCopyProperties, false) |
  504. kStaticProperties);
  505. SetInputSymbols(wrapped_->InputSymbols());
  506. SetOutputSymbols(wrapped_->OutputSymbols());
  507. }
  508. // This method ensures that any operations that alter the mutable data
  509. // portion of this EditFstImpl cause the data_ member to be copied when its
  510. // reference count is greater than 1. Note that this method is distinct from
  511. // MutableFst::Mutate, which gets invoked whenever one of the basic mutation
  512. // methods defined in MutableFst is invoked, such as SetInputSymbols.
  513. // The MutateCheck here in EditFstImpl is invoked whenever one of the
  514. // mutating methods specifically related to the types of edits provided
  515. // by EditFst is performed, such as changing an arc of an existing state
  516. // of the wrapped FST via a MutableArcIterator, or adding a new state via
  517. // AddState().
  518. void MutateCheck() {
  519. if (!data_.unique()) {
  520. data_ =
  521. std::make_shared<EditFstData<Arc, WrappedFstT, MutableFstT>>(*data_);
  522. }
  523. }
  524. // The FST that this FST wraps. The purpose of this class is to enable
  525. // non-destructive edits on this wrapped FST.
  526. std::unique_ptr<const WrappedFstT> wrapped_;
  527. // The mutable data for this EditFst instance, with delegates for all the
  528. // methods that can mutate data.
  529. std::shared_ptr<EditFstData<Arc, WrappedFstT, MutableFstT>> data_;
  530. };
  531. template <typename Arc, typename WrappedFstT, typename MutableFstT>
  532. inline void EditFstImpl<Arc, WrappedFstT, MutableFstT>::DeleteStates() {
  533. data_->DeleteStates();
  534. // we are deleting all states, so just forget about pointer to wrapped_
  535. // and do what default constructor does: set wrapped_ to a new VectorFst
  536. wrapped_ = std::make_unique<MutableFstT>();
  537. const auto new_props =
  538. DeleteAllStatesProperties(FstImpl<Arc>::Properties(), kStaticProperties);
  539. FstImpl<Arc>::SetProperties(new_props);
  540. }
  541. template <typename Arc, typename WrappedFstT, typename MutableFstT>
  542. EditFstImpl<Arc, WrappedFstT, MutableFstT> *
  543. EditFstImpl<Arc, WrappedFstT, MutableFstT>::Read(std::istream &strm,
  544. const FstReadOptions &opts) {
  545. auto impl = std::make_unique<EditFstImpl>();
  546. FstHeader hdr;
  547. if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return nullptr;
  548. impl->SetStart(hdr.Start());
  549. // Reads in wrapped FST.
  550. FstReadOptions wrapped_opts(opts);
  551. // Contained header was written out, so reads it in too.
  552. wrapped_opts.header = nullptr;
  553. std::unique_ptr<Fst<Arc>> wrapped_fst(Fst<Arc>::Read(strm, wrapped_opts));
  554. if (!wrapped_fst) return nullptr;
  555. impl->wrapped_.reset(down_cast<WrappedFstT *>(wrapped_fst.release()));
  556. impl->data_ = std::shared_ptr<EditFstData<Arc, WrappedFstT, MutableFstT>>(
  557. EditFstData<Arc, WrappedFstT, MutableFstT>::Read(strm, opts));
  558. if (!impl->data_) return nullptr;
  559. return impl.release();
  560. }
  561. } // namespace internal
  562. // Concrete, editable FST. This class attaches interface to implementation.
  563. //
  564. // EditFst is thread-compatible.
  565. template <typename A, typename WrappedFstT = ExpandedFst<A>,
  566. typename MutableFstT = VectorFst<A>>
  567. class EditFst : public ImplToMutableFst<
  568. internal::EditFstImpl<A, WrappedFstT, MutableFstT>> {
  569. public:
  570. using Arc = A;
  571. using StateId = typename Arc::StateId;
  572. using Impl = internal::EditFstImpl<Arc, WrappedFstT, MutableFstT>;
  573. friend class MutableArcIterator<EditFst<Arc, WrappedFstT, MutableFstT>>;
  574. EditFst() : ImplToMutableFst<Impl>(std::make_shared<Impl>()) {}
  575. explicit EditFst(const Fst<Arc> &fst)
  576. : ImplToMutableFst<Impl>(std::make_shared<Impl>(fst)) {}
  577. explicit EditFst(const WrappedFstT &fst)
  578. : ImplToMutableFst<Impl>(std::make_shared<Impl>(fst)) {}
  579. // See Fst<>::Copy() for doc.
  580. EditFst(const EditFst &fst, bool safe = false)
  581. : ImplToMutableFst<Impl>(fst, safe) {}
  582. ~EditFst() override = default;
  583. // Gets a copy of this EditFst. See Fst<>::Copy() for further doc.
  584. EditFst *Copy(bool safe = false) const override {
  585. return new EditFst(*this, safe);
  586. }
  587. EditFst &operator=(const EditFst &fst) {
  588. SetImpl(fst.GetSharedImpl());
  589. return *this;
  590. }
  591. EditFst &operator=(const Fst<Arc> &fst) override {
  592. SetImpl(std::make_shared<Impl>(fst));
  593. return *this;
  594. }
  595. // Reads an EditFst from an input stream, returning nullptr on error.
  596. static EditFst *Read(std::istream &strm, const FstReadOptions &opts) {
  597. auto *impl = Impl::Read(strm, opts);
  598. return impl ? new EditFst(std::shared_ptr<Impl>(impl)) : nullptr;
  599. }
  600. // Reads an EditFst from a file, returning nullptr on error. If the source
  601. // argument is an empty string, it reads from standard input.
  602. static EditFst *Read(std::string_view source) {
  603. auto *impl = ImplToExpandedFst<Impl, MutableFst<Arc>>::Read(source);
  604. return impl ? new EditFst(std::shared_ptr<Impl>(impl)) : nullptr;
  605. }
  606. bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
  607. return GetImpl()->Write(strm, opts);
  608. }
  609. bool Write(const std::string &source) const override {
  610. return Fst<Arc>::WriteFile(source);
  611. }
  612. void InitStateIterator(StateIteratorData<Arc> *data) const override {
  613. GetImpl()->InitStateIterator(data);
  614. }
  615. void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
  616. GetImpl()->InitArcIterator(s, data);
  617. }
  618. void InitMutableArcIterator(StateId s,
  619. MutableArcIteratorData<A> *data) override {
  620. GetMutableImpl()->InitMutableArcIterator(s, data);
  621. }
  622. private:
  623. explicit EditFst(std::shared_ptr<Impl> impl) : ImplToMutableFst<Impl>(impl) {}
  624. using ImplToFst<Impl, MutableFst<Arc>>::GetImpl;
  625. using ImplToFst<Impl, MutableFst<Arc>>::GetMutableImpl;
  626. using ImplToFst<Impl, MutableFst<Arc>>::SetImpl;
  627. };
  628. } // namespace fst
  629. #endif // FST_EDIT_FST_H_