// Copyright 2005-2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the 'License'); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an 'AS IS' BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // An FST implementation that allows non-destructive edit operations on an // existing FST. // // The EditFst class enables non-destructive edit operations on a wrapped // ExpandedFst. The implementation uses copy-on-write semantics at the node // level: if a user has an underlying FST on which they want to perform a // relatively small number of edits (read: mutations), then this implementation // will copy the edited node to an internal MutableFst and perform any edits in // situ on that copied node. This class supports all the methods of MutableFst // except for DeleteStates(const std::vector &); thus, new nodes may // also be // added, and one may add transitions from existing nodes of the wrapped FST to // new nodes. // // N.B.: The documentation for Fst::Copy(true) says that its behavior is // undefined if invoked on an FST that has already been accessed. This class // requires that the Fst implementation it wraps provides consistent, reliable // behavior when its Copy(true) method is invoked, where consistent means // the graph structure, graph properties and state numbering and do not change. // VectorFst and CompactFst, for example, are both well-behaved in this regard. #ifndef FST_EDIT_FST_H_ #define FST_EDIT_FST_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace fst { namespace internal { // The EditFstData class is a container for all mutable data for EditFstImpl; // also, this class provides most of the actual implementation of what EditFst // does (that is, most of EditFstImpl's methods delegate to methods in this, the // EditFstData class). Instances of this class are reference-counted and can be // shared between otherwise independent EditFstImpl instances. This scheme // allows EditFstImpl to implement the thread-safe, copy-on-write semantics // required by Fst::Copy(true). // // template parameters: // A: the type of arc to use // WrappedFstT: the type of FST wrapped by the EditFst instance that // this EditFstData instance is backing // MutableFstT: the type of mutable FST to use internally for edited states; // crucially, MutableFstT::Copy(false) *must* yield an FST that is // thread-safe for reading (VectorFst, for example, has this property) template , typename MutableFstT = VectorFst> class EditFstData { public: using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; EditFstData() : num_new_states_(0) {} EditFstData(const EditFstData &other) : edits_(other.edits_), external_to_internal_ids_(other.external_to_internal_ids_), edited_final_weights_(other.edited_final_weights_), num_new_states_(other.num_new_states_) {} ~EditFstData() = default; static EditFstData *Read(std::istream &strm, const FstReadOptions &opts); bool Write(std::ostream &strm, const FstWriteOptions &opts) const { // Serializes all private data members of this class. FstWriteOptions edits_opts(opts); edits_opts.write_header = true; // Forces writing contained header. edits_.Write(strm, edits_opts); WriteType(strm, external_to_internal_ids_); WriteType(strm, edited_final_weights_); WriteType(strm, num_new_states_); if (!strm) { LOG(ERROR) << "EditFstData::Write: Write failed: " << opts.source; return false; } return true; } StateId NumNewStates() const { return num_new_states_; } // Accessor methods for the FST holding edited states. StateId EditedStart() const { return edits_.Start(); } Weight Final(StateId s, const WrappedFstT *wrapped) const { auto final_weight_it = GetFinalWeightIterator(s); if (final_weight_it == NotInFinalWeightMap()) { const auto it = GetEditedIdMapIterator(s); return it == NotInEditedMap() ? wrapped->Final(s) : edits_.Final(it->second); } else { return final_weight_it->second; } } size_t NumArcs(StateId s, const WrappedFstT *wrapped) const { const auto it = GetEditedIdMapIterator(s); return it == NotInEditedMap() ? wrapped->NumArcs(s) : edits_.NumArcs(it->second); } size_t NumInputEpsilons(StateId s, const WrappedFstT *wrapped) const { const auto it = GetEditedIdMapIterator(s); return it == NotInEditedMap() ? wrapped->NumInputEpsilons(s) : edits_.NumInputEpsilons(it->second); } size_t NumOutputEpsilons(StateId s, const WrappedFstT *wrapped) const { const auto it = GetEditedIdMapIterator(s); return it == NotInEditedMap() ? wrapped->NumOutputEpsilons(s) : edits_.NumOutputEpsilons(it->second); } void SetEditedProperties(uint64_t props, uint64_t mask) { edits_.SetProperties(props, mask); } // Non-const MutableFst operations. // Sets the start state for this FST. void SetStart(StateId s) { edits_.SetStart(s); } // Sets the final state for this FST. Weight SetFinal(StateId s, Weight weight, const WrappedFstT *wrapped) { const auto old_weight = Final(s, wrapped); const auto it = GetEditedIdMapIterator(s); // If we haven't already edited state s, don't add it to edited_ (which can // be expensive if s has many transitions); just use the // edited_final_weights_ map. if (it == NotInEditedMap()) { edited_final_weights_[s] = weight; } else { edits_.SetFinal(GetEditableInternalId(s, wrapped), weight); } return old_weight; } // Adds a new state to this FST. StateId AddState(StateId curr_num_states) { external_to_internal_ids_[curr_num_states] = edits_.AddState(); ++num_new_states_; return curr_num_states; } // Adds new states to this FST. void AddStates(StateId curr_num_states, size_t n) { for (size_t i = 0; i < n; ++i) { curr_num_states = AddState(curr_num_states); } } // Adds the specified arc to the specified state of this FST. const Arc *AddArc(StateId s, const Arc &arc, const WrappedFstT *wrapped) { const auto internal_id = GetEditableInternalId(s, wrapped); const auto num_arcs = edits_.NumArcs(internal_id); ArcIterator arc_it(edits_, internal_id); const Arc *prev_arc = nullptr; if (num_arcs > 0) { // Grabs the final arc associated with this state in edits_. arc_it.Seek(num_arcs - 1); prev_arc = &(arc_it.Value()); } edits_.AddArc(internal_id, arc); return prev_arc; } void DeleteStates() { edits_.DeleteStates(); num_new_states_ = 0; external_to_internal_ids_.clear(); edited_final_weights_.clear(); } // Removes all but the first n outgoing arcs of the specified state. void DeleteArcs(StateId s, size_t n, const WrappedFstT *wrapped) { edits_.DeleteArcs(GetEditableInternalId(s, wrapped), n); } // Removes all outgoing arcs from the specified state. void DeleteArcs(StateId s, const WrappedFstT *wrapped) { edits_.DeleteArcs(GetEditableInternalId(s, wrapped)); } // End methods for non-const MutableFst operations. // Provides information for the generic arc iterator. void InitArcIterator(StateId s, ArcIteratorData *data, const WrappedFstT *wrapped) const { const auto it = GetEditedIdMapIterator(s); if (it == NotInEditedMap()) { VLOG(3) << "EditFstData::InitArcIterator: iterating on state " << s << " of original FST"; wrapped->InitArcIterator(s, data); } else { VLOG(2) << "EditFstData::InitArcIterator: iterating on edited state " << s << " (internal state ID: " << it->second << ")"; edits_.InitArcIterator(it->second, data); } } // Provides information for the generic mutable arc iterator. void InitMutableArcIterator(StateId s, MutableArcIteratorData *data, const WrappedFstT *wrapped) { data->base = std::make_unique>( &edits_, GetEditableInternalId(s, wrapped)); } // Prints out the map from external to internal state IDs (for debugging // purposes). void PrintMap() { for (auto it = external_to_internal_ids_.begin(); it != NotInEditedMap(); ++it) { LOG(INFO) << "(external,internal)=(" << it->first << "," << it->second << ")"; } } private: // Returns the iterator of the map from external to internal state IDs // of edits_ for the specified external state IDs. typename std::unordered_map::const_iterator GetEditedIdMapIterator(StateId s) const { return external_to_internal_ids_.find(s); } typename std::unordered_map::const_iterator NotInEditedMap() const { return external_to_internal_ids_.end(); } typename std::unordered_map::const_iterator GetFinalWeightIterator(StateId s) const { return edited_final_weights_.find(s); } typename std::unordered_map::const_iterator NotInFinalWeightMap() const { return edited_final_weights_.end(); } // Returns the internal state ID of the specified external ID if the state has // already been made editable, or else copies the state from wrapped_ to // edits_ and returns the state ID of the newly editable state in edits_. StateId GetEditableInternalId(StateId s, const WrappedFstT *wrapped) { auto id_map_it = GetEditedIdMapIterator(s); if (id_map_it == NotInEditedMap()) { StateId new_internal_id = edits_.AddState(); VLOG(2) << "EditFstData::GetEditableInternalId: editing state " << s << " of original FST; new internal state id:" << new_internal_id; external_to_internal_ids_[s] = new_internal_id; for (ArcIterator> arc_iterator(*wrapped, s); !arc_iterator.Done(); arc_iterator.Next()) { edits_.AddArc(new_internal_id, arc_iterator.Value()); } // Copies the final weight. auto final_weight_it = GetFinalWeightIterator(s); if (final_weight_it == NotInFinalWeightMap()) { edits_.SetFinal(new_internal_id, wrapped->Final(s)); } else { edits_.SetFinal(new_internal_id, final_weight_it->second); edited_final_weights_.erase(s); } return new_internal_id; } else { return id_map_it->second; } } // A mutable FST (by default, a VectorFst) to contain new states, and/or // copies of states from a wrapped ExpandedFst that have been modified in // some way. MutableFstT edits_; // A mapping from external state IDs to the internal IDs of states that // appear in edits_. std::unordered_map external_to_internal_ids_; // A mapping from external state IDs to final state weights assigned to // those states. The states in this map are *only* those whose final weight // has been modified; if any other part of the state has been modified, // the entire state is copied to edits_, and all modifications reside there. std::unordered_map edited_final_weights_; // The number of new states added to this mutable FST impl, which is <= the // number of states in edits_ (since edits_ contains both edited *and* new // states). StateId num_new_states_; }; // EditFstData method implementations: just the Read method. template EditFstData * EditFstData::Read(std::istream &strm, const FstReadOptions &opts) { auto data = fst::make_unique_for_overwrite(); // Next read in MutabelFstT machine that stores edits FstReadOptions edits_opts(opts); // Contained header was written out, so read it in. edits_opts.header = nullptr; // Because our internal representation of edited states is a solid object // of type MutableFstT (defaults to VectorFst) and not a pointer, // and because the static Read method allocates a new object on the heap, // we need to call Read, check if there was a failure, use // MutableFstT::operator= to assign the object (not the pointer) to the // edits_ data member (which will increase the ref count by 1 on the impl) // and, finally, delete the heap-allocated object. std::unique_ptr edits(MutableFstT::Read(strm, edits_opts)); if (!edits) return nullptr; data->edits_ = *edits; edits.reset(); // Finally, reads in rest of private data members. ReadType(strm, &data->external_to_internal_ids_); ReadType(strm, &data->edited_final_weights_); ReadType(strm, &data->num_new_states_); if (!strm) { LOG(ERROR) << "EditFst::Read: read failed: " << opts.source; return nullptr; } return data.release(); } // This class enables non-destructive edit operations on a wrapped ExpandedFst. // The implementation uses copy-on-write semantics at the node level: if a user // has an underlying FST on which they want to perform a relatively small // number of edits (read: mutations), then this implementation will copy the // edited node to an internal MutableFst and perform any edits in situ on that // copied node. This class supports all the methods of MutableFst except for // DeleteStates(const std::vector &); thus, new nodes may also be // added, and // one may add transitions from existing nodes of the wrapped FST to new nodes. // // template parameters: // A: the type of arc to use // WrappedFstT: the type of FST wrapped by the EditFst instance that // this EditFstImpl instance is backing // MutableFstT: the type of mutable FST to use internally for edited states; // crucially, MutableFstT::Copy(false) must yield an FST that is // thread-safe for reading (VectorFst, for example, has this property) template , typename MutableFstT = VectorFst> class EditFstImpl : public FstImpl { public: using Arc = A; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; using FstImpl::SetProperties; using FstImpl::SetInputSymbols; using FstImpl::SetOutputSymbols; using FstImpl::WriteHeader; // Constructs an editable FST implementation with no states. Effectively, this // initially-empty FST will in every way mimic the behavior of a // VectorFst---more precisely, a VectorFstImpl instance---but with slightly // slower performance (by a constant factor), due to the fact that // this class maintains a mapping between external state id's and // their internal equivalents. EditFstImpl() : wrapped_(new MutableFstT()) { FstImpl::SetType("edit"); InheritPropertiesFromWrapped(); data_ = std::make_shared>(); } // Wraps the specified ExpandedFst. This constructor requires that the // specified Fst is an ExpandedFst instance. This requirement is only enforced // at runtime. (See below for the reason.) // // This library uses the pointer-to-implementation or "PIMPL" design pattern. // In particular, to make it convenient to bind an implementation class to its // interface, there are a pair of template "binder" classes, one for immutable // and one for mutable FSTs (ImplToFst and ImplToMutableFst, respectively). // As it happens, the API for the ImplToMutableFst class requires that // the implementation class--the template parameter "I"--have a constructor // taking a const Fst reference. Accordingly, the constructor here must // perform a down_cast to the WrappedFstT type required by EditFst and // therefore EditFstImpl. explicit EditFstImpl(const Fst &wrapped) : wrapped_(down_cast(wrapped.Copy())) { FstImpl::SetType("edit"); data_ = std::make_shared>(); // have edits_ inherit all properties from wrapped_ data_->SetEditedProperties(wrapped_->Properties(kFstProperties, false), kFstProperties); InheritPropertiesFromWrapped(); } // A copy constructor for this implementation class, used to implement // the Copy() method of the Fst interface. EditFstImpl(const EditFstImpl &impl) : FstImpl(), wrapped_(down_cast(impl.wrapped_->Copy(true))), data_(impl.data_) { SetProperties(impl.Properties()); } // const Fst/ExpandedFst operations, declared in the Fst and ExpandedFst // interfaces StateId Start() const { const auto edited_start = data_->EditedStart(); return edited_start == kNoStateId ? wrapped_->Start() : edited_start; } Weight Final(StateId s) const { return data_->Final(s, wrapped_.get()); } size_t NumArcs(StateId s) const { return data_->NumArcs(s, wrapped_.get()); } size_t NumInputEpsilons(StateId s) const { return data_->NumInputEpsilons(s, wrapped_.get()); } size_t NumOutputEpsilons(StateId s) const { return data_->NumOutputEpsilons(s, wrapped_.get()); } StateId NumStates() const { return wrapped_->NumStates() + data_->NumNewStates(); } static EditFstImpl *Read(std::istream &strm, const FstReadOptions &opts); bool Write(std::ostream &strm, const FstWriteOptions &opts) const { FstHeader hdr; hdr.SetStart(Start()); hdr.SetNumStates(NumStates()); FstWriteOptions header_opts(opts); // Allows the contained FST to hold any symbols. header_opts.write_isymbols = false; header_opts.write_osymbols = false; WriteHeader(strm, header_opts, kFileVersion, &hdr); // Serializes the wrapped FST to stream. FstWriteOptions wrapped_opts(opts); // Forces writing the contained header. wrapped_opts.write_header = true; wrapped_->Write(strm, wrapped_opts); data_->Write(strm, opts); strm.flush(); if (!strm) { LOG(ERROR) << "EditFst::Write: Write failed: " << opts.source; return false; } return true; } // Sets the start state for this FST. void SetStart(StateId s) { MutateCheck(); data_->SetStart(s); SetProperties(SetStartProperties(FstImpl::Properties())); } // Sets the final state for this FST. void SetFinal(StateId s, Weight weight) { MutateCheck(); Weight old_weight = data_->SetFinal(s, weight, wrapped_.get()); SetProperties( SetFinalProperties(FstImpl::Properties(), old_weight, weight)); } // Adds a new state to this FST. StateId AddState() { MutateCheck(); SetProperties(AddStateProperties(FstImpl::Properties())); return data_->AddState(NumStates()); } // Adds new states to this FST. void AddStates(size_t n) { MutateCheck(); SetProperties(AddStateProperties(FstImpl::Properties())); return data_->AddStates(NumStates(), n); } // Adds the specified arc to the specified state of this FST. void AddArc(StateId s, const Arc &arc) { MutateCheck(); const auto *prev_arc = data_->AddArc(s, arc, wrapped_.get()); SetProperties( AddArcProperties(FstImpl::Properties(), s, arc, prev_arc)); } void DeleteStates(const std::vector &dstates) { FSTERROR() << ": EditFstImpl::DeleteStates(const std::vector&): " << " not implemented"; SetProperties(kError, kError); } // Deletes all states in this FST. void DeleteStates(); // Removes all but the first n outgoing arcs of the specified state. void DeleteArcs(StateId s, size_t n) { MutateCheck(); data_->DeleteArcs(s, n, wrapped_.get()); SetProperties(DeleteArcsProperties(FstImpl::Properties())); } // Removes all outgoing arcs from the specified state. void DeleteArcs(StateId s) { MutateCheck(); data_->DeleteArcs(s, wrapped_.get()); SetProperties(DeleteArcsProperties(FstImpl::Properties())); } void ReserveStates(StateId s) {} void ReserveArcs(StateId s, size_t n) {} // Ends non-const MutableFst operations. // Provides information for the generic state iterator. void InitStateIterator(StateIteratorData *data) const { data->base = nullptr; data->nstates = NumStates(); } // Provides information for the generic arc iterator. void InitArcIterator(StateId s, ArcIteratorData *data) const { data_->InitArcIterator(s, data, wrapped_.get()); } // Provides information for the generic mutable arc iterator. void InitMutableArcIterator(StateId s, MutableArcIteratorData *data) { MutateCheck(); data_->InitMutableArcIterator(s, data, wrapped_.get()); } private: // Properties always true of this FST class. static constexpr uint64_t kStaticProperties = kExpanded | kMutable; // Current file format version. static constexpr int kFileVersion = 2; // Minimum file format version supported static constexpr int kMinFileVersion = 2; // Causes this FST to inherit all the properties from its wrapped FST, except // for the two properties that always apply to EditFst instances: kExpanded // and kMutable. void InheritPropertiesFromWrapped() { SetProperties(wrapped_->Properties(kCopyProperties, false) | kStaticProperties); SetInputSymbols(wrapped_->InputSymbols()); SetOutputSymbols(wrapped_->OutputSymbols()); } // This method ensures that any operations that alter the mutable data // portion of this EditFstImpl cause the data_ member to be copied when its // reference count is greater than 1. Note that this method is distinct from // MutableFst::Mutate, which gets invoked whenever one of the basic mutation // methods defined in MutableFst is invoked, such as SetInputSymbols. // The MutateCheck here in EditFstImpl is invoked whenever one of the // mutating methods specifically related to the types of edits provided // by EditFst is performed, such as changing an arc of an existing state // of the wrapped FST via a MutableArcIterator, or adding a new state via // AddState(). void MutateCheck() { if (!data_.unique()) { data_ = std::make_shared>(*data_); } } // The FST that this FST wraps. The purpose of this class is to enable // non-destructive edits on this wrapped FST. std::unique_ptr wrapped_; // The mutable data for this EditFst instance, with delegates for all the // methods that can mutate data. std::shared_ptr> data_; }; template inline void EditFstImpl::DeleteStates() { data_->DeleteStates(); // we are deleting all states, so just forget about pointer to wrapped_ // and do what default constructor does: set wrapped_ to a new VectorFst wrapped_ = std::make_unique(); const auto new_props = DeleteAllStatesProperties(FstImpl::Properties(), kStaticProperties); FstImpl::SetProperties(new_props); } template EditFstImpl * EditFstImpl::Read(std::istream &strm, const FstReadOptions &opts) { auto impl = std::make_unique(); FstHeader hdr; if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return nullptr; impl->SetStart(hdr.Start()); // Reads in wrapped FST. FstReadOptions wrapped_opts(opts); // Contained header was written out, so reads it in too. wrapped_opts.header = nullptr; std::unique_ptr> wrapped_fst(Fst::Read(strm, wrapped_opts)); if (!wrapped_fst) return nullptr; impl->wrapped_.reset(down_cast(wrapped_fst.release())); impl->data_ = std::shared_ptr>( EditFstData::Read(strm, opts)); if (!impl->data_) return nullptr; return impl.release(); } } // namespace internal // Concrete, editable FST. This class attaches interface to implementation. // // EditFst is thread-compatible. template , typename MutableFstT = VectorFst> class EditFst : public ImplToMutableFst< internal::EditFstImpl> { public: using Arc = A; using StateId = typename Arc::StateId; using Impl = internal::EditFstImpl; friend class MutableArcIterator>; EditFst() : ImplToMutableFst(std::make_shared()) {} explicit EditFst(const Fst &fst) : ImplToMutableFst(std::make_shared(fst)) {} explicit EditFst(const WrappedFstT &fst) : ImplToMutableFst(std::make_shared(fst)) {} // See Fst<>::Copy() for doc. EditFst(const EditFst &fst, bool safe = false) : ImplToMutableFst(fst, safe) {} ~EditFst() override = default; // Gets a copy of this EditFst. See Fst<>::Copy() for further doc. EditFst *Copy(bool safe = false) const override { return new EditFst(*this, safe); } EditFst &operator=(const EditFst &fst) { SetImpl(fst.GetSharedImpl()); return *this; } EditFst &operator=(const Fst &fst) override { SetImpl(std::make_shared(fst)); return *this; } // Reads an EditFst from an input stream, returning nullptr on error. static EditFst *Read(std::istream &strm, const FstReadOptions &opts) { auto *impl = Impl::Read(strm, opts); return impl ? new EditFst(std::shared_ptr(impl)) : nullptr; } // Reads an EditFst from a file, returning nullptr on error. If the source // argument is an empty string, it reads from standard input. static EditFst *Read(std::string_view source) { auto *impl = ImplToExpandedFst>::Read(source); return impl ? new EditFst(std::shared_ptr(impl)) : nullptr; } bool Write(std::ostream &strm, const FstWriteOptions &opts) const override { return GetImpl()->Write(strm, opts); } bool Write(const std::string &source) const override { return Fst::WriteFile(source); } void InitStateIterator(StateIteratorData *data) const override { GetImpl()->InitStateIterator(data); } void InitArcIterator(StateId s, ArcIteratorData *data) const override { GetImpl()->InitArcIterator(s, data); } void InitMutableArcIterator(StateId s, MutableArcIteratorData *data) override { GetMutableImpl()->InitMutableArcIterator(s, data); } private: explicit EditFst(std::shared_ptr impl) : ImplToMutableFst(impl) {} using ImplToFst>::GetImpl; using ImplToFst>::GetMutableImpl; using ImplToFst>::SetImpl; }; } // namespace fst #endif // FST_EDIT_FST_H_