You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

1646 lines
57 KiB

// Copyright 2005-2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the 'License');
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an 'AS IS' BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// See www.openfst.org for extensive documentation on this weighted
// finite-state transducer library.
//
// FST Class for memory-efficient representation of common types of
// FSTs: linear automata, acceptors, unweighted FSTs, ...
#ifndef FST_COMPACT_FST_H_
#define FST_COMPACT_FST_H_
#include <sys/types.h>
#include <climits>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <istream>
#include <iterator>
#include <memory>
#include <ostream>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include <fst/log.h>
#include <fst/arc.h>
#include <fst/cache.h>
#include <fst/expanded-fst.h>
#include <fst/fst-decl.h> // For optional argument declarations
#include <fst/fst.h>
#include <fst/impl-to-fst.h>
#include <fst/mapped-file.h>
#include <fst/matcher.h>
#include <fst/properties.h>
#include <fst/test-properties.h>
#include <fst/util.h>
#include <string_view>
namespace fst {
struct CompactFstOptions : public CacheOptions {
// The default caching behaviour is to do no caching. Most compactors are
// cheap and therefore we save memory by not doing caching.
CompactFstOptions() : CacheOptions(true, 0) {}
explicit CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
};
// New (Fst) Compactor interface - used by CompactFst. This interface
// allows complete flexibility in how the compaction is accomplished.
//
// class Compactor {
// public:
// // Constructor from the Fst to be compacted. If compactor is present,
// // only optional state should be copied from it. Examples of this
// // optional state include compression level or ArcCompactors.
// explicit Compactor(const Fst<Arc> &fst,
// shared_ptr<Compactor> compactor = nullptr);
// // Copy constructor. Must make a thread-safe copy suitable for use by
// // by Fst::Copy(/*safe=*/true). Only thread-unsafe data structures
// // need to be deeply copied. Ideally, this constructor is O(1) and any
// // large structures are thread-safe and shared, while small ones may
// // need to be copied.
// Compactor(const Compactor &compactor);
// // Default constructor (optional, see comment below).
// Compactor();
//
// // Returns the start state, number of states, and total number of arcs
// // of the compacted Fst
// StateId Start() const;
// StateId NumStates() const;
// size_t NumArcs() const;
//
// // Accessor class for state attributes.
// class State {
// public:
// State(); // Required, corresponds to kNoStateId.
// // This constructor may, of course, also take a const Compactor *
// // for the first argument. It is recommended to use const Compactor *
// // if possible, but this can be Compactor * if necessary.
// State(Compactor *c, StateId s); // Accessor for StateId 's'.
// StateId GetStateId() const;
// Weight Final() const;
// size_t NumArcs() const;
// // Gets the 'i'th arc for the state. Requires i < NumArcs().
// // Flags are a bitmask of the kArc*Value flags that ArcIterator uses.
// Arc GetArc(size_t i, uint8_t flags) const;
// };
//
// // Modifies 'state' accessor to provide access to state id 's'.
// void SetState(StateId s, State *state);
//
// // Tests whether 'fst' can be compacted by this compactor.
// template <typename A>
// bool IsCompatible(const Fst<A> &fst) const;
//
// // Returns the properties that are always when an FST with the
// // specified properties is compacted using this compactor.
// // This function should clear bits for properties that no longer
// // hold and set those for properties that are known to hold.
// uint64_t Properties(uint64_t props) const;
//
// // Returns a string identifying the type of compactor.
// static const std::string &Type();
//
// // Returns true if an error has occurred.
// bool Error() const;
//
// // Writes a compactor to a file.
// bool Write(std::ostream &strm, const FstWriteOptions &opts) const;
//
// // Reads a compactor from a file.
// static Compactor *Read(std::istream &strm, const FstReadOptions &opts,
// const FstHeader &hdr);
// };
//
// Old ArcCompactor Interface:
//
// This interface is not deprecated; it, along with CompactArcStore and
// other Stores that implement its interface, is simply more constrained
// by essentially forcing the implementation to use an index array
// and an arc array, but giving flexibility in how those are implemented.
// This interface may still be useful and more convenient if that is the
// desired representation.
//
// The ArcCompactor class determines how arcs and final weights are compacted
// and expanded.
//
// Final weights are treated as transitions to the superfinal state, i.e.,
// ilabel = olabel = kNoLabel and nextstate = kNoStateId.
//
// There are two types of compactors:
//
// * Fixed out-degree compactors: 'compactor.Size()' returns a positive integer
// 's'. An FST can be compacted by this compactor only if each state has
// exactly 's' outgoing transitions (counting a non-Zero() final weight as a
// transition). A typical example is a compactor for string FSTs, i.e.,
// 's == 1'.
//
// * Variable out-degree compactors: 'compactor.Size() == -1'. There are no
// out-degree restrictions for these compactors.
//
// Interface:
//
// class ArcCompactor {
// public:
// // Default constructor (optional, see comment below).
// ArcCompactor();
//
// // Copy constructor. Must make a thread-safe copy suitable for use by
// // by Fst::Copy(/*safe=*/true). Only thread-unsafe data structures
// // need to be deeply copied.
// ArcCompactor(const ArcCompactor &);
//
// // Element is the type of the compacted transitions.
// using Element = ...
//
// // Returns the compacted representation of a transition 'arc'
// // at a state 's'.
// Element Compact(StateId s, const Arc &arc);
//
// // Returns the transition at state 's' represented by the compacted
// // transition 'e'.
// Arc Expand(StateId s, const Element &e) const;
//
// // Returns -1 for variable out-degree compactors, and the mandatory
// // out-degree otherwise.
// ssize_t Size() const;
//
// // Tests whether an FST can be compacted by this compactor.
// bool Compatible(const Fst<A> &fst) const;
//
// // Returns the properties that are always true for an FST compacted using
// // this compactor. Any Fst with the inverse of these properties should
// // be incompatible.
// uint64_t Properties() const;
//
// // Returns a string identifying the type of compactor.
// static const std::string &Type();
//
// // Writes a compactor to a file.
// bool Write(std::ostream &strm) const;
//
// // Reads a compactor from a file.
// static ArcCompactor *Read(std::istream &strm);
// };
//
// The default constructor is only required for FST_REGISTER to work (i.e.,
// enabling Convert() and the command-line utilities to work with this new
// compactor). However, a default constructor always needs to be specified for
// this code to compile, but one can have it simply raise an error when called,
// like so:
//
// Compactor::Compactor() {
// FSTERROR() << "Compactor: No default constructor";
// }
// Default implementation data for CompactArcCompactor. Only old-style
// ArcCompactors are supported because the CompactArcStore constructors
// use the old API.
//
// DefaultCompact store is thread-compatible, but not thread-safe.
// The copy constructor makes a thread-safe copy.
//
// The implementation contains two arrays: 'states_' and 'compacts_'.
//
// For fixed out-degree compactors, the 'states_' array is unallocated. The
// 'compacts_' array contains the compacted transitions. Its size is
// 'ncompacts_'. The outgoing transitions at a given state are stored
// consecutively. For a given state 's', its 'compactor.Size()' outgoing
// transitions (including a superfinal transition when 's' is final), are stored
// in positions ['s*compactor.Size()', '(s+1)*compactor.Size()').
//
// For variable out-degree compactors, the states_ array has size
// 'nstates_ + 1' and contains positions in the 'compacts_' array. For a
// given state 's', the compacted transitions of 's' are stored in positions
// ['states_[s]', 'states_[s + 1]') in 'compacts_'. By convention,
// 'states_[nstates_] == ncompacts_'.
//
// In both cases, the superfinal transitions (when 's' is final, i.e.,
// 'Final(s) != Weight::Zero()') are stored first.
//
// The unsigned type U is used to represent indices into the compacts_ array.
template <class Element, class Unsigned>
class CompactArcStore {
public:
CompactArcStore() = default;
// Makes a thread-safe copy. O(1).
CompactArcStore(const CompactArcStore &) = default;
template <class Arc, class ArcCompactor>
CompactArcStore(const Fst<Arc> &fst, const ArcCompactor &arc_compactor);
template <class Iterator, class ArcCompactor>
CompactArcStore(const Iterator begin, const Iterator end,
const ArcCompactor &arc_compactor);
~CompactArcStore() = default;
template <class ArcCompactor>
static CompactArcStore *Read(std::istream &strm, const FstReadOptions &opts,
const FstHeader &hdr,
const ArcCompactor &arc_compactor);
bool Write(std::ostream &strm, const FstWriteOptions &opts) const;
// Returns the starting index in 'compacts_' of the transitions
// for state 'i'. See class-level comment for further details.
// Requires that the CompactArcStore was constructed with a
// variable out-degree compactor. Requires 0 <= i <= NumStates().
// By convention, States(NumStates()) == NumCompacts().
Unsigned States(ssize_t i) const { return states_[i]; }
// Returns the compacted Element at position i. See class-level comment
// for further details. Requires 0 <= i < NumCompacts().
const Element &Compacts(size_t i) const { return compacts_[i]; }
size_t NumStates() const { return nstates_; }
size_t NumCompacts() const { return ncompacts_; }
size_t NumArcs() const { return narcs_; }
ssize_t Start() const { return start_; }
bool Error() const { return error_; }
// Returns a string identifying the type of data storage container.
static const std::string &Type();
private:
std::shared_ptr<MappedFile> states_region_;
std::shared_ptr<MappedFile> compacts_region_;
// Unowned pointer into states_region_.
Unsigned *states_ = nullptr;
// Unowned pointer into compacts_region_.
Element *compacts_ = nullptr;
size_t nstates_ = 0;
size_t ncompacts_ = 0;
size_t narcs_ = 0;
ssize_t start_ = kNoStateId;
bool error_ = false;
};
template <class Element, class Unsigned>
template <class Arc, class ArcCompactor>
CompactArcStore<Element, Unsigned>::CompactArcStore(
const Fst<Arc> &fst, const ArcCompactor &arc_compactor) {
using StateId = typename Arc::StateId;
using Weight = typename Arc::Weight;
start_ = fst.Start();
// Counts # of states and arcs.
StateId nfinals = 0;
for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
++nstates_;
const auto s = siter.Value();
narcs_ += fst.NumArcs(s);
if (fst.Final(s) != Weight::Zero()) ++nfinals;
}
if (arc_compactor.Size() == -1) {
states_region_ = fst::WrapUnique(MappedFile::Allocate(
sizeof(states_[0]) * (nstates_ + 1), alignof(decltype(states_[0]))));
states_ = static_cast<Unsigned *>(states_region_->mutable_data());
ncompacts_ = narcs_ + nfinals;
compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
states_[nstates_] = ncompacts_;
} else {
states_ = nullptr;
ncompacts_ = nstates_ * arc_compactor.Size();
if ((narcs_ + nfinals) != ncompacts_) {
FSTERROR() << "CompactArcStore: ArcCompactor incompatible with FST";
error_ = true;
return;
}
compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
}
size_t pos = 0;
size_t fpos = 0;
for (size_t s = 0; s < nstates_; ++s) {
fpos = pos;
if (arc_compactor.Size() == -1) states_[s] = pos;
if (fst.Final(s) != Weight::Zero()) {
compacts_[pos++] = arc_compactor.Compact(
s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
}
for (ArcIterator<Fst<Arc>> aiter(fst, s); !aiter.Done(); aiter.Next()) {
compacts_[pos++] = arc_compactor.Compact(s, aiter.Value());
}
if ((arc_compactor.Size() != -1) && (pos != fpos + arc_compactor.Size())) {
FSTERROR() << "CompactArcStore: ArcCompactor incompatible with FST";
error_ = true;
return;
}
}
if (pos != ncompacts_) {
FSTERROR() << "CompactArcStore: ArcCompactor incompatible with FST";
error_ = true;
return;
}
}
template <class Element, class Unsigned>
template <class Iterator, class ArcCompactor>
CompactArcStore<Element, Unsigned>::CompactArcStore(
const Iterator begin, const Iterator end,
const ArcCompactor &arc_compactor) {
using Arc = typename ArcCompactor::Arc;
using Weight = typename Arc::Weight;
if (arc_compactor.Size() != -1) {
ncompacts_ = std::distance(begin, end);
if (arc_compactor.Size() == 1) {
// For strings, allows implicit final weight. Empty input is the empty
// string.
if (ncompacts_ == 0) {
++ncompacts_;
} else {
const auto arc =
arc_compactor.Expand(ncompacts_ - 1, *(begin + (ncompacts_ - 1)));
if (arc.ilabel != kNoLabel) ++ncompacts_;
}
}
if (ncompacts_ % arc_compactor.Size()) {
FSTERROR() << "CompactArcStore: Size of input container incompatible"
<< " with arc compactor";
error_ = true;
return;
}
if (ncompacts_ == 0) return;
start_ = 0;
nstates_ = ncompacts_ / arc_compactor.Size();
compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
size_t i = 0;
Iterator it = begin;
for (; it != end; ++it, ++i) {
compacts_[i] = *it;
if (arc_compactor.Expand(i, *it).ilabel != kNoLabel) ++narcs_;
}
if (i < ncompacts_) {
compacts_[i] = arc_compactor.Compact(
i, Arc(kNoLabel, kNoLabel, Weight::One(), kNoStateId));
}
} else {
if (std::distance(begin, end) == 0) return;
// Count # of states, arcs and compacts.
auto it = begin;
for (size_t i = 0; it != end; ++it, ++i) {
const auto arc = arc_compactor.Expand(i, *it);
if (arc.ilabel != kNoLabel) {
++narcs_;
++ncompacts_;
} else {
++nstates_;
if (arc.weight != Weight::Zero()) ++ncompacts_;
}
}
start_ = 0;
compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
states_region_ = fst::WrapUnique(MappedFile::Allocate(
sizeof(states_[0]) * (nstates_ + 1), alignof(decltype(states_[0]))));
states_ = static_cast<Unsigned *>(states_region_->mutable_data());
states_[nstates_] = ncompacts_;
size_t i = 0;
size_t s = 0;
for (it = begin; it != end; ++it) {
const auto arc = arc_compactor.Expand(i, *it);
if (arc.ilabel != kNoLabel) {
compacts_[i++] = *it;
} else {
states_[s++] = i;
if (arc.weight != Weight::Zero()) compacts_[i++] = *it;
}
}
if ((s != nstates_) || (i != ncompacts_)) {
FSTERROR() << "CompactArcStore: Ill-formed input container";
error_ = true;
return;
}
}
}
template <class Element, class Unsigned>
template <class ArcCompactor>
CompactArcStore<Element, Unsigned> *CompactArcStore<Element, Unsigned>::Read(
std::istream &strm, const FstReadOptions &opts, const FstHeader &hdr,
const ArcCompactor &arc_compactor) {
auto data = std::make_unique<CompactArcStore>();
data->start_ = hdr.Start();
data->nstates_ = hdr.NumStates();
data->narcs_ = hdr.NumArcs();
if (arc_compactor.Size() == -1) {
if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
LOG(ERROR) << "CompactArcStore::Read: Alignment failed: " << opts.source;
return nullptr;
}
auto b = (data->nstates_ + 1) * sizeof(Unsigned);
data->states_region_.reset(MappedFile::Map(
strm, opts.mode == FstReadOptions::MAP, opts.source, b));
if (!strm || !data->states_region_) {
LOG(ERROR) << "CompactArcStore::Read: Read failed: " << opts.source;
return nullptr;
}
data->states_ =
static_cast<Unsigned *>(data->states_region_->mutable_data());
} else {
data->states_ = nullptr;
}
data->ncompacts_ = arc_compactor.Size() == -1
? data->states_[data->nstates_]
: data->nstates_ * arc_compactor.Size();
if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
LOG(ERROR) << "CompactArcStore::Read: Alignment failed: " << opts.source;
return nullptr;
}
size_t b = data->ncompacts_ * sizeof(Element);
data->compacts_region_.reset(
MappedFile::Map(strm, opts.mode == FstReadOptions::MAP, opts.source, b));
if (!strm || !data->compacts_region_) {
LOG(ERROR) << "CompactArcStore::Read: Read failed: " << opts.source;
return nullptr;
}
data->compacts_ =
static_cast<Element *>(data->compacts_region_->mutable_data());
return data.release();
}
template <class Element, class Unsigned>
bool CompactArcStore<Element, Unsigned>::Write(
std::ostream &strm, const FstWriteOptions &opts) const {
if (states_) {
if (opts.align && !AlignOutput(strm)) {
LOG(ERROR) << "CompactArcStore::Write: Alignment failed: " << opts.source;
return false;
}
strm.write(reinterpret_cast<const char *>(states_),
(nstates_ + 1) * sizeof(Unsigned));
}
if (opts.align && !AlignOutput(strm)) {
LOG(ERROR) << "CompactArcStore::Write: Alignment failed: " << opts.source;
return false;
}
strm.write(reinterpret_cast<const char *>(compacts_),
ncompacts_ * sizeof(Element));
strm.flush();
if (!strm) {
LOG(ERROR) << "CompactArcStore::Write: Write failed: " << opts.source;
return false;
}
return true;
}
template <class Element, class Unsigned>
const std::string &CompactArcStore<Element, Unsigned>::Type() {
static const std::string *const type = new std::string("compact");
return *type;
}
template <class C, class U, class S>
class CompactArcState;
// Wraps an old-style arc compactor and a compact store as a new Fst compactor.
// The copy constructors of AC and S must make thread-safe copies and should
// be O(1).
template <class AC, class U,
class S /*= CompactArcStore<typename AC::Element, U>*/>
class CompactArcCompactor {
public:
using ArcCompactor = AC;
using Unsigned = U;
using CompactStore = S;
using Element = typename AC::Element;
using Arc = typename AC::Arc;
using StateId = typename Arc::StateId;
using Weight = typename Arc::Weight;
using State = CompactArcState<AC, U, S>;
friend State;
CompactArcCompactor() : arc_compactor_(nullptr), compact_store_(nullptr) {}
// Constructs from Fst.
explicit CompactArcCompactor(const Fst<Arc> &fst,
ArcCompactor &&arc_compactor = ArcCompactor())
: CompactArcCompactor(
fst, std::make_shared<ArcCompactor>(std::move(arc_compactor))) {}
CompactArcCompactor(const Fst<Arc> &fst,
std::shared_ptr<ArcCompactor> arc_compactor)
: arc_compactor_(std::move(arc_compactor)),
compact_store_(std::make_shared<S>(fst, *arc_compactor_)) {}
CompactArcCompactor(const Fst<Arc> &fst,
std::shared_ptr<CompactArcCompactor> compactor)
: arc_compactor_(compactor->arc_compactor_),
compact_store_(compactor->compact_store_ == nullptr
? std::make_shared<S>(fst, *arc_compactor_)
: compactor->compact_store_) {}
// Constructs from CompactStore.
CompactArcCompactor(std::shared_ptr<ArcCompactor> arc_compactor,
std::shared_ptr<CompactStore> compact_store)
: arc_compactor_(std::move(arc_compactor)),
compact_store_(std::move(compact_store)) {}
// The following 2 constructors take as input two iterators delimiting a set
// of (already) compacted transitions, starting with the transitions out of
// the initial state. The format of the input differs for fixed out-degree
// and variable out-degree arc compactors.
//
// - For fixed out-degree arc compactors, the final weight (encoded as a
// compacted transition) needs to be given only for final states. All strings
// (arc compactor of size 1) will be assume to be terminated by a final state
// even when the final state is not implicitely given.
//
// - For variable out-degree arc compactors, the final weight (encoded as a
// compacted transition) needs to be given for all states and must appeared
// first in the list (for state s, final weight of s, followed by outgoing
// transitons in s).
//
// These 2 constructors allows the direct construction of a CompactArcFst
// without first creating a more memory-hungry regular FST. This is useful
// when memory usage is severely constrained.
//
// Usage:
// CompactArcFst<...> fst(
// std::make_shared<CompactArcFst<...>::Compactor>(b, e));
template <class Iterator>
CompactArcCompactor(const Iterator b, const Iterator e,
std::shared_ptr<ArcCompactor> arc_compactor)
: arc_compactor_(std::move(arc_compactor)),
compact_store_(std::make_shared<S>(b, e, *arc_compactor_)) {}
template <class Iterator>
CompactArcCompactor(const Iterator b, const Iterator e)
: CompactArcCompactor(b, e, std::make_shared<ArcCompactor>()) {}
// Copy constructor. This makes a thread-safe copy, so requires that
// The ArcCompactor and CompactStore copy constructors make thread-safe
// copies.
CompactArcCompactor(const CompactArcCompactor &compactor)
: arc_compactor_(
compactor.GetArcCompactor() == nullptr
? nullptr
: std::make_shared<ArcCompactor>(*compactor.GetArcCompactor())),
compact_store_(compactor.GetCompactStore() == nullptr
? nullptr
: std::make_shared<CompactStore>(
*compactor.GetCompactStore())) {}
template <class OtherC>
explicit CompactArcCompactor(
const CompactArcCompactor<OtherC, U, S> &compactor)
: arc_compactor_(
compactor.GetArcCompactor() == nullptr
? nullptr
: std::make_shared<ArcCompactor>(*compactor.GetArcCompactor())),
compact_store_(compactor.GetCompactStore() == nullptr
? nullptr
: std::make_shared<CompactStore>(
*compactor.GetCompactStore())) {}
StateId Start() const { return compact_store_->Start(); }
StateId NumStates() const { return compact_store_->NumStates(); }
size_t NumArcs() const { return compact_store_->NumArcs(); }
void SetState(StateId s, State *state) const {
if (state->GetStateId() != s) state->Set(this, s);
}
static CompactArcCompactor *Read(std::istream &strm,
const FstReadOptions &opts,
const FstHeader &hdr) {
std::shared_ptr<ArcCompactor> arc_compactor(ArcCompactor::Read(strm));
if (arc_compactor == nullptr) return nullptr;
std::shared_ptr<S> compact_store(S::Read(strm, opts, hdr, *arc_compactor));
if (compact_store == nullptr) return nullptr;
return new CompactArcCompactor(arc_compactor, compact_store);
}
bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
return arc_compactor_->Write(strm) && compact_store_->Write(strm, opts);
}
uint64_t Properties(uint64_t props) const {
// ArcCompactor properties can just be or-ed in since it is assumed that
// if the ArcCompactor sets a property, any FST with the inverse
// property is incompatible.
return arc_compactor_->Properties() | props;
}
bool IsCompatible(const Fst<Arc> &fst) const {
return arc_compactor_->Compatible(fst);
}
bool Error() const { return compact_store_->Error(); }
bool HasFixedOutdegree() const { return arc_compactor_->Size() != -1; }
static const std::string &Type() {
static const std::string *const type = [] {
std::string type = "compact";
if (sizeof(U) != sizeof(uint32_t)) type += std::to_string(8 * sizeof(U));
type += "_";
type += ArcCompactor::Type();
if (CompactStore::Type() != "compact") {
type += "_";
type += CompactStore::Type();
}
return new std::string(type);
}();
return *type;
}
const ArcCompactor *GetArcCompactor() const { return arc_compactor_.get(); }
const CompactStore *GetCompactStore() const { return compact_store_.get(); }
ArcCompactor *MutableArcCompactor() { return arc_compactor_.get(); }
CompactStore *MutableCompactStore() { return compact_store_.get(); }
std::shared_ptr<ArcCompactor> SharedArcCompactor() { return arc_compactor_; }
std::shared_ptr<CompactStore> SharedCompactStore() { return compact_store_; }
// TODO(allauzen): remove dependencies on this method and make private.
Arc ComputeArc(StateId s, Unsigned i, uint8_t flags) const {
return arc_compactor_->Expand(s, compact_store_->Compacts(i), flags);
}
private:
std::pair<Unsigned, Unsigned> CompactsRange(StateId s) const {
std::pair<size_t, size_t> range;
if (HasFixedOutdegree()) {
range.first = s * arc_compactor_->Size();
range.second = arc_compactor_->Size();
} else {
range.first = compact_store_->States(s);
range.second = compact_store_->States(s + 1) - range.first;
}
return range;
}
private:
std::shared_ptr<ArcCompactor> arc_compactor_;
std::shared_ptr<CompactStore> compact_store_;
};
// Default implementation of state attributes accessor class for
// CompactArcCompactor. Use of efficient specialization strongly encouraged.
template <class ArcCompactor, class U, class S>
class CompactArcState {
public:
using Arc = typename ArcCompactor::Arc;
using StateId = typename Arc::StateId;
using Weight = typename Arc::Weight;
using Compactor = CompactArcCompactor<ArcCompactor, U, S>;
CompactArcState() = default;
CompactArcState(const Compactor *compactor, StateId s)
: compactor_(compactor),
s_(s),
range_(compactor->CompactsRange(s)),
has_final_(
range_.second != 0 &&
compactor->ComputeArc(s, range_.first, kArcILabelValue).ilabel ==
kNoLabel) {
if (has_final_) {
++range_.first;
--range_.second;
}
}
void Set(const Compactor *compactor, StateId s) {
compactor_ = compactor;
s_ = s;
range_ = compactor->CompactsRange(s);
if (range_.second != 0 &&
compactor->ComputeArc(s, range_.first, kArcILabelValue).ilabel ==
kNoLabel) {
has_final_ = true;
++range_.first;
--range_.second;
} else {
has_final_ = false;
}
}
StateId GetStateId() const { return s_; }
Weight Final() const {
if (!has_final_) return Weight::Zero();
return compactor_->ComputeArc(s_, range_.first - 1, kArcWeightValue).weight;
}
size_t NumArcs() const { return range_.second; }
Arc GetArc(size_t i, uint8_t flags) const {
return compactor_->ComputeArc(s_, range_.first + i, flags);
}
private:
const Compactor *compactor_ = nullptr; // borrowed ref.
StateId s_ = kNoStateId;
std::pair<U, U> range_ = {0, 0};
bool has_final_ = false;
};
// Specialization for CompactArcStore.
template <class ArcCompactor, class U>
class CompactArcState<ArcCompactor, U,
CompactArcStore<typename ArcCompactor::Element, U>> {
public:
using Arc = typename ArcCompactor::Arc;
using StateId = typename Arc::StateId;
using Weight = typename Arc::Weight;
using CompactStore = CompactArcStore<typename ArcCompactor::Element, U>;
using Compactor = CompactArcCompactor<ArcCompactor, U, CompactStore>;
CompactArcState() = default;
CompactArcState(const Compactor *compactor, StateId s)
: arc_compactor_(compactor->GetArcCompactor()), s_(s) {
Init(compactor);
}
void Set(const Compactor *compactor, StateId s) {
arc_compactor_ = compactor->GetArcCompactor();
s_ = s;
has_final_ = false;
Init(compactor);
}
StateId GetStateId() const { return s_; }
Weight Final() const {
if (!has_final_) return Weight::Zero();
return arc_compactor_->Expand(s_, *(compacts_ - 1), kArcWeightValue).weight;
}
size_t NumArcs() const { return num_arcs_; }
Arc GetArc(size_t i, uint8_t flags) const {
return arc_compactor_->Expand(s_, compacts_[i], flags);
}
private:
void Init(const Compactor *compactor) {
const auto *store = compactor->GetCompactStore();
U offset;
if (!compactor->HasFixedOutdegree()) { // Variable out-degree compactor.
offset = store->States(s_);
num_arcs_ = store->States(s_ + 1) - offset;
} else { // Fixed out-degree compactor.
offset = s_ * arc_compactor_->Size();
num_arcs_ = arc_compactor_->Size();
}
if (num_arcs_ > 0) {
compacts_ = &(store->Compacts(offset));
if (arc_compactor_->Expand(s_, *compacts_, kArcILabelValue).ilabel ==
kNoStateId) {
++compacts_;
--num_arcs_;
has_final_ = true;
}
}
}
private:
const ArcCompactor *arc_compactor_ = nullptr; // Borrowed reference.
const typename ArcCompactor::Element *compacts_ =
nullptr; // Borrowed reference.
StateId s_ = kNoStateId;
U num_arcs_ = 0;
bool has_final_ = false;
};
template <class F, class G>
void Cast(const F &, G *);
template <class CompactArcFST, class FST>
bool WriteCompactArcFst(
const FST &fst,
const typename CompactArcFST::Compactor::ArcCompactor &arc_compactor,
std::ostream &strm, const FstWriteOptions &opts);
namespace internal {
// Implementation class for CompactFst, which contains parametrizeable
// Fst data storage (CompactArcStore by default) and Fst cache.
// C's copy constructor must make a thread-safe copy.
template <class Arc, class C, class CacheStore = DefaultCacheStore<Arc>>
class CompactFstImpl
: public CacheBaseImpl<typename CacheStore::State, CacheStore> {
public:
using Weight = typename Arc::Weight;
using StateId = typename Arc::StateId;
using Compactor = C;
using FstImpl<Arc>::SetType;
using FstImpl<Arc>::SetProperties;
using FstImpl<Arc>::Properties;
using FstImpl<Arc>::SetInputSymbols;
using FstImpl<Arc>::SetOutputSymbols;
using FstImpl<Arc>::WriteHeader;
using ImplBase = CacheBaseImpl<typename CacheStore::State, CacheStore>;
using ImplBase::HasArcs;
using ImplBase::HasFinal;
using ImplBase::HasStart;
using ImplBase::PushArc;
using ImplBase::SetArcs;
using ImplBase::SetFinal;
using ImplBase::SetStart;
CompactFstImpl()
: ImplBase(CompactFstOptions()),
compactor_(std::make_shared<Compactor>()) {
SetType(Compactor::Type());
SetProperties(kNullProperties | kStaticProperties);
}
// Constructs a CompactFstImpl, creating a new Compactor using
// Compactor(fst, compactor); this uses the compactor arg only for optional
// information, such as compression level. See the Compactor interface
// description.
CompactFstImpl(const Fst<Arc> &fst, std::shared_ptr<Compactor> compactor,
const CompactFstOptions &opts)
: ImplBase(opts),
compactor_(std::make_shared<Compactor>(fst, std::move(compactor))) {
SetType(Compactor::Type());
SetInputSymbols(fst.InputSymbols());
SetOutputSymbols(fst.OutputSymbols());
if (compactor_->Error()) SetProperties(kError, kError);
uint64_t copy_properties =
fst.Properties(kMutable, false)
? fst.Properties(kCopyProperties, true)
: CheckProperties(
fst, kCopyProperties & ~kWeightedCycles & ~kUnweightedCycles,
kCopyProperties);
if ((copy_properties & kError) || !compactor_->IsCompatible(fst)) {
FSTERROR() << "CompactFstImpl: Input Fst incompatible with compactor";
SetProperties(kError, kError);
return;
}
SetProperties(compactor_->Properties(copy_properties) | kStaticProperties);
}
CompactFstImpl(std::shared_ptr<Compactor> compactor,
const CompactFstOptions &opts)
: ImplBase(opts), compactor_(std::move(compactor)) {
SetType(Compactor::Type());
SetProperties(kStaticProperties | compactor_->Properties(0));
if (compactor_->Error()) SetProperties(kError, kError);
}
// Makes a thread-safe copy; requires that Compactor's copy constructor
// does so as well.
CompactFstImpl(const CompactFstImpl &impl)
: ImplBase(impl),
compactor_(impl.compactor_ == nullptr
? std::make_shared<Compactor>()
: std::make_shared<Compactor>(*impl.compactor_)) {
SetType(impl.Type());
SetProperties(impl.Properties());
SetInputSymbols(impl.InputSymbols());
SetOutputSymbols(impl.OutputSymbols());
}
// Allows to change the cache store from OtherCacheStore to CacheStore.
template <class OtherCacheStore>
explicit CompactFstImpl(
const CompactFstImpl<Arc, Compactor, OtherCacheStore> &impl)
: ImplBase(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
compactor_(impl.compactor_ == nullptr
? std::make_shared<Compactor>()
: std::make_shared<Compactor>(*impl.compactor_)) {
SetType(impl.Type());
SetProperties(impl.Properties());
SetInputSymbols(impl.InputSymbols());
SetOutputSymbols(impl.OutputSymbols());
}
StateId Start() {
if (!HasStart()) SetStart(compactor_->Start());
return ImplBase::Start();
}
Weight Final(StateId s) {
if (HasFinal(s)) return ImplBase::Final(s);
compactor_->SetState(s, &state_);
return state_.Final();
}
StateId NumStates() const {
if (Properties(kError)) return 0;
return compactor_->NumStates();
}
size_t NumArcs(StateId s) {
if (HasArcs(s)) return ImplBase::NumArcs(s);
compactor_->SetState(s, &state_);
return state_.NumArcs();
}
size_t NumInputEpsilons(StateId s) {
if (!HasArcs(s) && !Properties(kILabelSorted)) Expand(s);
if (HasArcs(s)) return ImplBase::NumInputEpsilons(s);
return CountEpsilons(s, false);
}
size_t NumOutputEpsilons(StateId s) {
if (!HasArcs(s) && !Properties(kOLabelSorted)) Expand(s);
if (HasArcs(s)) return ImplBase::NumOutputEpsilons(s);
return CountEpsilons(s, true);
}
size_t CountEpsilons(StateId s, bool output_epsilons) {
compactor_->SetState(s, &state_);
const uint8_t flags = output_epsilons ? kArcOLabelValue : kArcILabelValue;
size_t num_eps = 0;
const size_t num_arcs = state_.NumArcs();
for (size_t i = 0; i < num_arcs; ++i) {
const auto &arc = state_.GetArc(i, flags);
const auto label = output_epsilons ? arc.olabel : arc.ilabel;
if (label == 0) {
++num_eps;
} else if (label > 0) {
break;
}
}
return num_eps;
}
static CompactFstImpl *Read(std::istream &strm, const FstReadOptions &opts) {
auto impl = std::make_unique<CompactFstImpl>();
FstHeader hdr;
if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
return nullptr;
}
// Ensures compatibility.
if (hdr.Version() == kAlignedFileVersion) {
hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
}
impl->compactor_ =
std::shared_ptr<Compactor>(Compactor::Read(strm, opts, hdr));
if (!impl->compactor_) {
return nullptr;
}
return impl.release();
}
bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
FstHeader hdr;
hdr.SetStart(compactor_->Start());
hdr.SetNumStates(compactor_->NumStates());
hdr.SetNumArcs(compactor_->NumArcs());
// Ensures compatibility.
const auto file_version = opts.align ? kAlignedFileVersion : kFileVersion;
WriteHeader(strm, opts, file_version, &hdr);
return compactor_->Write(strm, opts);
}
// Provides information needed for generic state iterator.
void InitStateIterator(StateIteratorData<Arc> *data) const {
data->base = nullptr;
data->nstates = compactor_->NumStates();
}
void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) {
if (!HasArcs(s)) Expand(s);
ImplBase::InitArcIterator(s, data);
}
void Expand(StateId s) {
compactor_->SetState(s, &state_);
const size_t num_arcs = state_.NumArcs();
for (size_t i = 0; i < num_arcs; ++i)
PushArc(s, state_.GetArc(i, kArcValueFlags));
SetArcs(s);
if (!HasFinal(s)) SetFinal(s, state_.Final());
}
const Compactor *GetCompactor() const { return compactor_.get(); }
Compactor *MutableCompactor() { return compactor_.get(); }
std::shared_ptr<Compactor> SharedCompactor() { return compactor_; }
void SetCompactor(std::shared_ptr<Compactor> compactor) {
// TODO(allauzen): is this correct? is this needed?
// TODO(allauzen): consider removing and forcing this through direct calls
// to compactor.
compactor_ = std::move(compactor);
}
// Properties always true of this FST class.
static constexpr uint64_t kStaticProperties = kExpanded;
protected:
template <class OtherArc, class OtherCompactor, class OtherCacheStore>
explicit CompactFstImpl(
const CompactFstImpl<OtherArc, OtherCompactor, OtherCacheStore> &impl)
: compactor_(std::make_shared<Compactor>(*impl.GetCompactor())) {
SetType(impl.Type());
SetProperties(impl.Properties());
SetInputSymbols(impl.InputSymbols());
SetOutputSymbols(impl.OutputSymbols());
}
private:
// For k*Version constants.
template <class CompactArcFST, class FST>
friend bool ::fst::WriteCompactArcFst(
const FST &fst,
const typename CompactArcFST::Compactor::ArcCompactor &arc_compactor,
std::ostream &strm, const FstWriteOptions &opts);
// Current unaligned file format version.
static constexpr int kFileVersion = 2;
// Current aligned file format version.
static constexpr int kAlignedFileVersion = 1;
// Minimum file format version supported.
static constexpr int kMinFileVersion = 1;
std::shared_ptr<Compactor> compactor_;
typename Compactor::State state_;
};
// Returns the compactor for the CompactFst; intended to be called as
// GetCompactor<CompactorType>(fst), which returns the compactor only if it
// is of the specified type and otherwise nullptr (via the overload below).
template <class Compactor, class Arc>
const Compactor *GetCompactor(const CompactFst<Arc, Compactor> &fst) {
return fst.GetCompactor();
}
template <class Compactor, class Arc>
const Compactor *GetCompactor(const Fst<Arc> &fst) {
return nullptr;
}
} // namespace internal
// This class attaches interface to implementation and handles reference
// counting, delegating most methods to ImplToExpandedFst.
// (Template argument defaults are declared in fst-decl.h.)
template <class A, class C, class CacheStore>
class CompactFst
: public ImplToExpandedFst<internal::CompactFstImpl<A, C, CacheStore>> {
public:
template <class F, class G>
void friend Cast(const F &, G *);
using Arc = A;
using StateId = typename Arc::StateId;
using Compactor = C;
using Impl = internal::CompactFstImpl<Arc, Compactor, CacheStore>;
using Store = CacheStore; // for CacheArcIterator
friend class StateIterator<CompactFst>;
friend class ArcIterator<CompactFst>;
CompactFst() : ImplToExpandedFst<Impl>(std::make_shared<Impl>()) {}
explicit CompactFst(const Fst<Arc> &fst,
const CompactFstOptions &opts = CompactFstOptions())
: CompactFst(fst, std::make_shared<Compactor>(fst), opts) {}
// Constructs a CompactFst, creating a new Compactor using
// Compactor(fst, compactor); this uses the compactor arg only for optional
// information, such as compression level. See the Compactor interface
// description.
CompactFst(const Fst<Arc> &fst, std::shared_ptr<Compactor> compactor,
const CompactFstOptions &opts = CompactFstOptions())
: ImplToExpandedFst<Impl>(
std::make_shared<Impl>(fst, std::move(compactor), opts)) {}
// Convenience constructor taking a Compactor rvalue ref. Avoids
// clutter of make_shared<Compactor> at call site.
// Constructs a CompactFst, creating a new Compactor using
// Compactor(fst, compactor); this uses the compactor arg only for optional
// information, such as compression level. See the Compactor interface
// description.
CompactFst(const Fst<Arc> &fst, Compactor &&compactor,
const CompactFstOptions &opts = CompactFstOptions())
: CompactFst(fst, std::make_shared<Compactor>(std::move(compactor)),
opts) {}
explicit CompactFst(std::shared_ptr<Compactor> compactor,
const CompactFstOptions &opts = CompactFstOptions())
: ImplToExpandedFst<Impl>(
std::make_shared<Impl>(std::move(compactor), opts)) {}
// See Fst<>::Copy() for doc.
CompactFst(const CompactFst &fst, bool safe = false)
: ImplToExpandedFst<Impl>(fst, safe) {}
// Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
CompactFst *Copy(bool safe = false) const override {
return new CompactFst(*this, safe);
}
// Read a CompactFst from an input stream; return nullptr on error
static CompactFst *Read(std::istream &strm, const FstReadOptions &opts) {
auto *impl = Impl::Read(strm, opts);
return impl ? new CompactFst(std::shared_ptr<Impl>(impl)) : nullptr;
}
// Read a CompactFst from a file; return nullptr on error
// Empty source reads from standard input
static CompactFst *Read(std::string_view source) {
auto *impl = ImplToExpandedFst<Impl>::Read(source);
return impl ? new CompactFst(std::shared_ptr<Impl>(impl)) : nullptr;
}
bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
return GetImpl()->Write(strm, opts);
}
bool Write(const std::string &source) const override {
return Fst<Arc>::WriteFile(source);
}
void InitStateIterator(StateIteratorData<Arc> *data) const override {
GetImpl()->InitStateIterator(data);
}
void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
GetMutableImpl()->InitArcIterator(s, data);
}
MatcherBase<Arc> *InitMatcher(MatchType match_type) const override {
return new SortedMatcher<CompactFst>(*this, match_type);
}
const Compactor *GetCompactor() const { return GetImpl()->GetCompactor(); }
void SetCompactor(std::shared_ptr<Compactor> compactor) {
GetMutableImpl()->SetCompactor(std::move(compactor));
}
private:
using ImplToFst<Impl, ExpandedFst<Arc>>::GetImpl;
using ImplToFst<Impl, ExpandedFst<Arc>>::GetMutableImpl;
explicit CompactFst(std::shared_ptr<Impl> impl)
: ImplToExpandedFst<Impl>(std::move(impl)) {}
CompactFst &operator=(const CompactFst &fst) = delete;
};
// Writes FST in ArcCompacted format, with a possible pass over the machine
// before writing to compute the number of states and arcs.
template <class CompactArcFST, class FST>
bool WriteCompactArcFst(
const FST &fst,
const typename CompactArcFST::Compactor::ArcCompactor &arc_compactor,
std::ostream &strm, const FstWriteOptions &opts) {
using Arc = typename CompactArcFST::Arc;
using Compactor = typename CompactArcFST::Compactor;
using ArcCompactor = typename Compactor::ArcCompactor;
using CompactStore = typename Compactor::CompactStore;
using Element = typename ArcCompactor::Element;
using Impl = typename CompactArcFST::Impl;
using Unsigned = typename Compactor::Unsigned;
using Weight = typename Arc::Weight;
const auto file_version =
opts.align ? Impl::kAlignedFileVersion : Impl::kFileVersion;
size_t num_arcs = -1;
size_t num_states = -1;
auto first_pass_arc_compactor = arc_compactor;
// Note that GetCompactor will only return non-null if the compactor has the
// exact type Compactor == CompactArcFst::Compactor. This is what we want;
// other types must do an extra pass to set the arc compactor state.
if (const Compactor *const compactor =
internal::GetCompactor<Compactor>(fst)) {
num_arcs = compactor->NumArcs();
num_states = compactor->NumStates();
first_pass_arc_compactor = *compactor->GetArcCompactor();
} else {
// A first pass is needed to compute the state of the compactor, which
// is saved ahead of the rest of the data structures. This unfortunately
// means forcing a complete double compaction when writing in this format.
// TODO(allauzen): eliminate mutable state from compactors.
num_arcs = 0;
num_states = 0;
for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
const auto s = siter.Value();
++num_states;
if (fst.Final(s) != Weight::Zero()) {
first_pass_arc_compactor.Compact(
s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
}
for (ArcIterator<FST> aiter(fst, s); !aiter.Done(); aiter.Next()) {
++num_arcs;
first_pass_arc_compactor.Compact(s, aiter.Value());
}
}
}
FstHeader hdr;
hdr.SetStart(fst.Start());
hdr.SetNumStates(num_states);
hdr.SetNumArcs(num_arcs);
std::string type = "compact";
if (sizeof(Unsigned) != sizeof(uint32_t)) {
type += std::to_string(CHAR_BIT * sizeof(Unsigned));
}
type += "_";
type += ArcCompactor::Type();
if (CompactStore::Type() != "compact") {
type += "_";
type += CompactStore::Type();
}
const auto copy_properties = fst.Properties(kCopyProperties, true);
if ((copy_properties & kError) || !arc_compactor.Compatible(fst)) {
FSTERROR() << "Fst incompatible with compactor";
return false;
}
uint64_t properties = copy_properties | Impl::kStaticProperties;
internal::FstImpl<Arc>::WriteFstHeader(fst, strm, opts, file_version, type,
properties, &hdr);
first_pass_arc_compactor.Write(strm);
if (first_pass_arc_compactor.Size() == -1) {
if (opts.align && !AlignOutput(strm)) {
LOG(ERROR) << "WriteCompactArcFst: Alignment failed: " << opts.source;
return false;
}
Unsigned compacts = 0;
for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
const auto s = siter.Value();
strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
if (fst.Final(s) != Weight::Zero()) {
++compacts;
}
compacts += fst.NumArcs(s);
}
strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
}
if (opts.align && !AlignOutput(strm)) {
LOG(ERROR) << "Could not align file during write after writing states";
}
const auto &second_pass_arc_compactor = arc_compactor;
Element element;
for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
const auto s = siter.Value();
if (fst.Final(s) != Weight::Zero()) {
element = second_pass_arc_compactor.Compact(
s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
}
for (ArcIterator<FST> aiter(fst, s); !aiter.Done(); aiter.Next()) {
element = second_pass_arc_compactor.Compact(s, aiter.Value());
strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
}
}
strm.flush();
if (!strm) {
LOG(ERROR) << "WriteCompactArcFst: Write failed: " << opts.source;
return false;
}
return true;
}
// Specialization for CompactFst; see generic version in fst.h for sample
// usage (but use the CompactFst type!). This version should inline.
template <class Arc, class Compactor, class CacheStore>
class StateIterator<CompactFst<Arc, Compactor, CacheStore>> {
public:
using StateId = typename Arc::StateId;
explicit StateIterator(const CompactFst<Arc, Compactor, CacheStore> &fst)
: nstates_(fst.NumStates()), s_(0) {}
bool Done() const { return s_ >= nstates_; }
StateId Value() const { return s_; }
void Next() { ++s_; }
void Reset() { s_ = 0; }
private:
StateId nstates_;
StateId s_;
};
// Specialization for CompactFst. Never caches,
// always iterates over the underlying compact elements.
template <class Arc, class Compactor, class CacheStore>
class ArcIterator<CompactFst<Arc, Compactor, CacheStore>> {
public:
using StateId = typename Arc::StateId;
using State = typename Compactor::State;
ArcIterator(const CompactFst<Arc, Compactor, CacheStore> &fst, StateId s)
: state_(fst.GetMutableImpl()->MutableCompactor(), s),
pos_(0),
num_arcs_(state_.NumArcs()),
flags_(kArcValueFlags) {}
bool Done() const { return pos_ >= num_arcs_; }
const Arc &Value() const {
arc_ = state_.GetArc(pos_, flags_);
return arc_;
}
void Next() { ++pos_; }
size_t Position() const { return pos_; }
void Reset() { pos_ = 0; }
void Seek(size_t pos) { pos_ = pos; }
uint8_t Flags() const { return flags_; }
void SetFlags(uint8_t flags, uint8_t mask) {
flags_ &= ~mask;
flags_ |= (flags & kArcValueFlags);
}
private:
State state_;
size_t pos_;
// Cache the value of NumArcs(), since it is used in Done() and may be slow.
size_t num_arcs_;
mutable Arc arc_;
uint8_t flags_;
};
// ArcCompactor for unweighted string FSTs.
template <class A>
class StringCompactor {
public:
using Arc = A;
using Label = typename Arc::Label;
using StateId = typename Arc::StateId;
using Weight = typename Arc::Weight;
using Element = Label;
Element Compact(StateId s, const Arc &arc) const { return arc.ilabel; }
Arc Expand(StateId s, const Element &p,
uint8_t flags = kArcValueFlags) const {
return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
}
constexpr ssize_t Size() const { return 1; }
constexpr uint64_t Properties() const { return kCompiledStringProperties; }
bool Compatible(const Fst<Arc> &fst) const {
const auto props = Properties();
return fst.Properties(props, true) == props;
}
static const std::string &Type() {
static const std::string *const type = new std::string("string");
return *type;
}
bool Write(std::ostream &strm) const { return true; }
static StringCompactor *Read(std::istream &strm) {
return new StringCompactor;
}
};
// ArcCompactor for weighted string FSTs.
template <class A>
class WeightedStringCompactor {
public:
using Arc = A;
using Label = typename Arc::Label;
using StateId = typename Arc::StateId;
using Weight = typename Arc::Weight;
using Element = std::pair<Label, Weight>;
Element Compact(StateId s, const Arc &arc) const {
return std::make_pair(arc.ilabel, arc.weight);
}
Arc Expand(StateId s, const Element &p,
uint8_t flags = kArcValueFlags) const {
return Arc(p.first, p.first, p.second,
p.first != kNoLabel ? s + 1 : kNoStateId);
}
constexpr ssize_t Size() const { return 1; }
constexpr uint64_t Properties() const { return kString | kAcceptor; }
bool Compatible(const Fst<Arc> &fst) const {
const auto props = Properties();
return fst.Properties(props, true) == props;
}
static const std::string &Type() {
static const std::string *const type = new std::string("weighted_string");
return *type;
}
bool Write(std::ostream &strm) const { return true; }
static WeightedStringCompactor *Read(std::istream &strm) {
return new WeightedStringCompactor;
}
};
// ArcCompactor for unweighted acceptor FSTs.
template <class A>
class UnweightedAcceptorCompactor {
public:
using Arc = A;
using Label = typename Arc::Label;
using StateId = typename Arc::StateId;
using Weight = typename Arc::Weight;
using Element = std::pair<Label, StateId>;
Element Compact(StateId s, const Arc &arc) const {
return std::make_pair(arc.ilabel, arc.nextstate);
}
Arc Expand(StateId s, const Element &p,
uint8_t flags = kArcValueFlags) const {
return Arc(p.first, p.first, Weight::One(), p.second);
}
constexpr ssize_t Size() const { return -1; }
constexpr uint64_t Properties() const { return kAcceptor | kUnweighted; }
bool Compatible(const Fst<Arc> &fst) const {
const auto props = Properties();
return fst.Properties(props, true) == props;
}
static const std::string &Type() {
static const std::string *const type =
new std::string("unweighted_acceptor");
return *type;
}
bool Write(std::ostream &strm) const { return true; }
static UnweightedAcceptorCompactor *Read(std::istream &istrm) {
return new UnweightedAcceptorCompactor;
}
};
// ArcCompactor for weighted acceptor FSTs.
template <class A>
class AcceptorCompactor {
public:
using Arc = A;
using Label = typename Arc::Label;
using StateId = typename Arc::StateId;
using Weight = typename Arc::Weight;
using Element = std::pair<std::pair<Label, Weight>, StateId>;
Element Compact(StateId s, const Arc &arc) const {
return std::make_pair(std::make_pair(arc.ilabel, arc.weight),
arc.nextstate);
}
Arc Expand(StateId s, const Element &p,
uint8_t flags = kArcValueFlags) const {
return Arc(p.first.first, p.first.first, p.first.second, p.second);
}
constexpr ssize_t Size() const { return -1; }
constexpr uint64_t Properties() const { return kAcceptor; }
bool Compatible(const Fst<Arc> &fst) const {
const auto props = Properties();
return fst.Properties(props, true) == props;
}
static const std::string &Type() {
static const std::string *const type = new std::string("acceptor");
return *type;
}
bool Write(std::ostream &strm) const { return true; }
static AcceptorCompactor *Read(std::istream &strm) {
return new AcceptorCompactor;
}
};
// ArcCompactor for unweighted FSTs.
template <class A>
class UnweightedCompactor {
public:
using Arc = A;
using Label = typename Arc::Label;
using StateId = typename Arc::StateId;
using Weight = typename Arc::Weight;
using Element = std::pair<std::pair<Label, Label>, StateId>;
Element Compact(StateId s, const Arc &arc) const {
return std::make_pair(std::make_pair(arc.ilabel, arc.olabel),
arc.nextstate);
}
Arc Expand(StateId s, const Element &p,
uint8_t flags = kArcValueFlags) const {
return Arc(p.first.first, p.first.second, Weight::One(), p.second);
}
constexpr ssize_t Size() const { return -1; }
constexpr uint64_t Properties() const { return kUnweighted; }
bool Compatible(const Fst<Arc> &fst) const {
const auto props = Properties();
return fst.Properties(props, true) == props;
}
static const std::string &Type() {
static const std::string *const type = new std::string("unweighted");
return *type;
}
bool Write(std::ostream &strm) const { return true; }
static UnweightedCompactor *Read(std::istream &strm) {
return new UnweightedCompactor;
}
};
template <class Arc, class Unsigned /* = uint32_t */>
using CompactStringFst = CompactArcFst<Arc, StringCompactor<Arc>, Unsigned>;
template <class Arc, class Unsigned /* = uint32_t */>
using CompactWeightedStringFst =
CompactArcFst<Arc, WeightedStringCompactor<Arc>, Unsigned>;
template <class Arc, class Unsigned /* = uint32_t */>
using CompactAcceptorFst = CompactArcFst<Arc, AcceptorCompactor<Arc>, Unsigned>;
template <class Arc, class Unsigned /* = uint32_t */>
using CompactUnweightedFst =
CompactArcFst<Arc, UnweightedCompactor<Arc>, Unsigned>;
template <class Arc, class Unsigned /* = uint32_t */>
using CompactUnweightedAcceptorFst =
CompactArcFst<Arc, UnweightedAcceptorCompactor<Arc>, Unsigned>;
using StdCompactStringFst = CompactStringFst<StdArc, uint32_t>;
using StdCompactWeightedStringFst = CompactWeightedStringFst<StdArc, uint32_t>;
using StdCompactAcceptorFst = CompactAcceptorFst<StdArc, uint32_t>;
using StdCompactUnweightedFst = CompactUnweightedFst<StdArc, uint32_t>;
using StdCompactUnweightedAcceptorFst =
CompactUnweightedAcceptorFst<StdArc, uint32_t>;
// Convenience function to make a CompactStringFst from a sequence
// of Arc::Labels. LabelIterator must be an input iterator.
template <class Arc, class Unsigned = uint32_t, class LabelIterator>
inline CompactStringFst<Arc, Unsigned> MakeCompactStringFst(
const LabelIterator begin, const LabelIterator end) {
using CompactStringFst = CompactStringFst<Arc, Unsigned>;
using Compactor = typename CompactStringFst::Compactor;
return CompactStringFst(std::make_shared<Compactor>(begin, end));
}
template <class LabelIterator>
inline StdCompactStringFst MakeStdCompactStringFst(const LabelIterator begin,
const LabelIterator end) {
return MakeCompactStringFst<StdArc>(begin, end);
}
} // namespace fst
#endif // FST_COMPACT_FST_H_