|
// Copyright 2005-2024 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the 'License');
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an 'AS IS' BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
// See www.openfst.org for extensive documentation on this weighted
|
|
// finite-state transducer library.
|
|
//
|
|
// FST Class for memory-efficient representation of common types of
|
|
// FSTs: linear automata, acceptors, unweighted FSTs, ...
|
|
|
|
#ifndef FST_COMPACT_FST_H_
|
|
#define FST_COMPACT_FST_H_
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <climits>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <ctime>
|
|
#include <istream>
|
|
#include <iterator>
|
|
#include <memory>
|
|
#include <ostream>
|
|
#include <string>
|
|
#include <tuple>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include <fst/log.h>
|
|
#include <fst/arc.h>
|
|
#include <fst/cache.h>
|
|
#include <fst/expanded-fst.h>
|
|
#include <fst/fst-decl.h> // For optional argument declarations
|
|
#include <fst/fst.h>
|
|
#include <fst/impl-to-fst.h>
|
|
#include <fst/mapped-file.h>
|
|
#include <fst/matcher.h>
|
|
#include <fst/properties.h>
|
|
#include <fst/test-properties.h>
|
|
#include <fst/util.h>
|
|
#include <string_view>
|
|
|
|
namespace fst {
|
|
|
|
struct CompactFstOptions : public CacheOptions {
|
|
// The default caching behaviour is to do no caching. Most compactors are
|
|
// cheap and therefore we save memory by not doing caching.
|
|
CompactFstOptions() : CacheOptions(true, 0) {}
|
|
|
|
explicit CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
|
|
};
|
|
|
|
// New (Fst) Compactor interface - used by CompactFst. This interface
|
|
// allows complete flexibility in how the compaction is accomplished.
|
|
//
|
|
// class Compactor {
|
|
// public:
|
|
// // Constructor from the Fst to be compacted. If compactor is present,
|
|
// // only optional state should be copied from it. Examples of this
|
|
// // optional state include compression level or ArcCompactors.
|
|
// explicit Compactor(const Fst<Arc> &fst,
|
|
// shared_ptr<Compactor> compactor = nullptr);
|
|
// // Copy constructor. Must make a thread-safe copy suitable for use by
|
|
// // by Fst::Copy(/*safe=*/true). Only thread-unsafe data structures
|
|
// // need to be deeply copied. Ideally, this constructor is O(1) and any
|
|
// // large structures are thread-safe and shared, while small ones may
|
|
// // need to be copied.
|
|
// Compactor(const Compactor &compactor);
|
|
// // Default constructor (optional, see comment below).
|
|
// Compactor();
|
|
//
|
|
// // Returns the start state, number of states, and total number of arcs
|
|
// // of the compacted Fst
|
|
// StateId Start() const;
|
|
// StateId NumStates() const;
|
|
// size_t NumArcs() const;
|
|
//
|
|
// // Accessor class for state attributes.
|
|
// class State {
|
|
// public:
|
|
// State(); // Required, corresponds to kNoStateId.
|
|
// // This constructor may, of course, also take a const Compactor *
|
|
// // for the first argument. It is recommended to use const Compactor *
|
|
// // if possible, but this can be Compactor * if necessary.
|
|
// State(Compactor *c, StateId s); // Accessor for StateId 's'.
|
|
// StateId GetStateId() const;
|
|
// Weight Final() const;
|
|
// size_t NumArcs() const;
|
|
// // Gets the 'i'th arc for the state. Requires i < NumArcs().
|
|
// // Flags are a bitmask of the kArc*Value flags that ArcIterator uses.
|
|
// Arc GetArc(size_t i, uint8_t flags) const;
|
|
// };
|
|
//
|
|
// // Modifies 'state' accessor to provide access to state id 's'.
|
|
// void SetState(StateId s, State *state);
|
|
//
|
|
// // Tests whether 'fst' can be compacted by this compactor.
|
|
// template <typename A>
|
|
// bool IsCompatible(const Fst<A> &fst) const;
|
|
//
|
|
// // Returns the properties that are always when an FST with the
|
|
// // specified properties is compacted using this compactor.
|
|
// // This function should clear bits for properties that no longer
|
|
// // hold and set those for properties that are known to hold.
|
|
// uint64_t Properties(uint64_t props) const;
|
|
//
|
|
// // Returns a string identifying the type of compactor.
|
|
// static const std::string &Type();
|
|
//
|
|
// // Returns true if an error has occurred.
|
|
// bool Error() const;
|
|
//
|
|
// // Writes a compactor to a file.
|
|
// bool Write(std::ostream &strm, const FstWriteOptions &opts) const;
|
|
//
|
|
// // Reads a compactor from a file.
|
|
// static Compactor *Read(std::istream &strm, const FstReadOptions &opts,
|
|
// const FstHeader &hdr);
|
|
// };
|
|
//
|
|
|
|
// Old ArcCompactor Interface:
|
|
//
|
|
// This interface is not deprecated; it, along with CompactArcStore and
|
|
// other Stores that implement its interface, is simply more constrained
|
|
// by essentially forcing the implementation to use an index array
|
|
// and an arc array, but giving flexibility in how those are implemented.
|
|
// This interface may still be useful and more convenient if that is the
|
|
// desired representation.
|
|
//
|
|
// The ArcCompactor class determines how arcs and final weights are compacted
|
|
// and expanded.
|
|
//
|
|
// Final weights are treated as transitions to the superfinal state, i.e.,
|
|
// ilabel = olabel = kNoLabel and nextstate = kNoStateId.
|
|
//
|
|
// There are two types of compactors:
|
|
//
|
|
// * Fixed out-degree compactors: 'compactor.Size()' returns a positive integer
|
|
// 's'. An FST can be compacted by this compactor only if each state has
|
|
// exactly 's' outgoing transitions (counting a non-Zero() final weight as a
|
|
// transition). A typical example is a compactor for string FSTs, i.e.,
|
|
// 's == 1'.
|
|
//
|
|
// * Variable out-degree compactors: 'compactor.Size() == -1'. There are no
|
|
// out-degree restrictions for these compactors.
|
|
//
|
|
// Interface:
|
|
//
|
|
// class ArcCompactor {
|
|
// public:
|
|
// // Default constructor (optional, see comment below).
|
|
// ArcCompactor();
|
|
//
|
|
// // Copy constructor. Must make a thread-safe copy suitable for use by
|
|
// // by Fst::Copy(/*safe=*/true). Only thread-unsafe data structures
|
|
// // need to be deeply copied.
|
|
// ArcCompactor(const ArcCompactor &);
|
|
//
|
|
// // Element is the type of the compacted transitions.
|
|
// using Element = ...
|
|
//
|
|
// // Returns the compacted representation of a transition 'arc'
|
|
// // at a state 's'.
|
|
// Element Compact(StateId s, const Arc &arc);
|
|
//
|
|
// // Returns the transition at state 's' represented by the compacted
|
|
// // transition 'e'.
|
|
// Arc Expand(StateId s, const Element &e) const;
|
|
//
|
|
// // Returns -1 for variable out-degree compactors, and the mandatory
|
|
// // out-degree otherwise.
|
|
// ssize_t Size() const;
|
|
//
|
|
// // Tests whether an FST can be compacted by this compactor.
|
|
// bool Compatible(const Fst<A> &fst) const;
|
|
//
|
|
// // Returns the properties that are always true for an FST compacted using
|
|
// // this compactor. Any Fst with the inverse of these properties should
|
|
// // be incompatible.
|
|
// uint64_t Properties() const;
|
|
//
|
|
// // Returns a string identifying the type of compactor.
|
|
// static const std::string &Type();
|
|
//
|
|
// // Writes a compactor to a file.
|
|
// bool Write(std::ostream &strm) const;
|
|
//
|
|
// // Reads a compactor from a file.
|
|
// static ArcCompactor *Read(std::istream &strm);
|
|
// };
|
|
//
|
|
// The default constructor is only required for FST_REGISTER to work (i.e.,
|
|
// enabling Convert() and the command-line utilities to work with this new
|
|
// compactor). However, a default constructor always needs to be specified for
|
|
// this code to compile, but one can have it simply raise an error when called,
|
|
// like so:
|
|
//
|
|
// Compactor::Compactor() {
|
|
// FSTERROR() << "Compactor: No default constructor";
|
|
// }
|
|
|
|
// Default implementation data for CompactArcCompactor. Only old-style
|
|
// ArcCompactors are supported because the CompactArcStore constructors
|
|
// use the old API.
|
|
//
|
|
// DefaultCompact store is thread-compatible, but not thread-safe.
|
|
// The copy constructor makes a thread-safe copy.
|
|
//
|
|
// The implementation contains two arrays: 'states_' and 'compacts_'.
|
|
//
|
|
// For fixed out-degree compactors, the 'states_' array is unallocated. The
|
|
// 'compacts_' array contains the compacted transitions. Its size is
|
|
// 'ncompacts_'. The outgoing transitions at a given state are stored
|
|
// consecutively. For a given state 's', its 'compactor.Size()' outgoing
|
|
// transitions (including a superfinal transition when 's' is final), are stored
|
|
// in positions ['s*compactor.Size()', '(s+1)*compactor.Size()').
|
|
//
|
|
// For variable out-degree compactors, the states_ array has size
|
|
// 'nstates_ + 1' and contains positions in the 'compacts_' array. For a
|
|
// given state 's', the compacted transitions of 's' are stored in positions
|
|
// ['states_[s]', 'states_[s + 1]') in 'compacts_'. By convention,
|
|
// 'states_[nstates_] == ncompacts_'.
|
|
//
|
|
// In both cases, the superfinal transitions (when 's' is final, i.e.,
|
|
// 'Final(s) != Weight::Zero()') are stored first.
|
|
//
|
|
// The unsigned type U is used to represent indices into the compacts_ array.
|
|
template <class Element, class Unsigned>
|
|
class CompactArcStore {
|
|
public:
|
|
CompactArcStore() = default;
|
|
|
|
// Makes a thread-safe copy. O(1).
|
|
CompactArcStore(const CompactArcStore &) = default;
|
|
|
|
template <class Arc, class ArcCompactor>
|
|
CompactArcStore(const Fst<Arc> &fst, const ArcCompactor &arc_compactor);
|
|
|
|
template <class Iterator, class ArcCompactor>
|
|
CompactArcStore(const Iterator begin, const Iterator end,
|
|
const ArcCompactor &arc_compactor);
|
|
|
|
~CompactArcStore() = default;
|
|
|
|
template <class ArcCompactor>
|
|
static CompactArcStore *Read(std::istream &strm, const FstReadOptions &opts,
|
|
const FstHeader &hdr,
|
|
const ArcCompactor &arc_compactor);
|
|
|
|
bool Write(std::ostream &strm, const FstWriteOptions &opts) const;
|
|
|
|
// Returns the starting index in 'compacts_' of the transitions
|
|
// for state 'i'. See class-level comment for further details.
|
|
// Requires that the CompactArcStore was constructed with a
|
|
// variable out-degree compactor. Requires 0 <= i <= NumStates().
|
|
// By convention, States(NumStates()) == NumCompacts().
|
|
Unsigned States(ssize_t i) const { return states_[i]; }
|
|
|
|
// Returns the compacted Element at position i. See class-level comment
|
|
// for further details. Requires 0 <= i < NumCompacts().
|
|
const Element &Compacts(size_t i) const { return compacts_[i]; }
|
|
|
|
size_t NumStates() const { return nstates_; }
|
|
|
|
size_t NumCompacts() const { return ncompacts_; }
|
|
|
|
size_t NumArcs() const { return narcs_; }
|
|
|
|
ssize_t Start() const { return start_; }
|
|
|
|
bool Error() const { return error_; }
|
|
|
|
// Returns a string identifying the type of data storage container.
|
|
static const std::string &Type();
|
|
|
|
private:
|
|
std::shared_ptr<MappedFile> states_region_;
|
|
std::shared_ptr<MappedFile> compacts_region_;
|
|
// Unowned pointer into states_region_.
|
|
Unsigned *states_ = nullptr;
|
|
// Unowned pointer into compacts_region_.
|
|
Element *compacts_ = nullptr;
|
|
size_t nstates_ = 0;
|
|
size_t ncompacts_ = 0;
|
|
size_t narcs_ = 0;
|
|
ssize_t start_ = kNoStateId;
|
|
bool error_ = false;
|
|
};
|
|
|
|
template <class Element, class Unsigned>
|
|
template <class Arc, class ArcCompactor>
|
|
CompactArcStore<Element, Unsigned>::CompactArcStore(
|
|
const Fst<Arc> &fst, const ArcCompactor &arc_compactor) {
|
|
using StateId = typename Arc::StateId;
|
|
using Weight = typename Arc::Weight;
|
|
start_ = fst.Start();
|
|
// Counts # of states and arcs.
|
|
StateId nfinals = 0;
|
|
for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
|
|
++nstates_;
|
|
const auto s = siter.Value();
|
|
narcs_ += fst.NumArcs(s);
|
|
if (fst.Final(s) != Weight::Zero()) ++nfinals;
|
|
}
|
|
if (arc_compactor.Size() == -1) {
|
|
states_region_ = fst::WrapUnique(MappedFile::Allocate(
|
|
sizeof(states_[0]) * (nstates_ + 1), alignof(decltype(states_[0]))));
|
|
states_ = static_cast<Unsigned *>(states_region_->mutable_data());
|
|
ncompacts_ = narcs_ + nfinals;
|
|
compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
|
|
sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
|
|
compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
|
|
states_[nstates_] = ncompacts_;
|
|
} else {
|
|
states_ = nullptr;
|
|
ncompacts_ = nstates_ * arc_compactor.Size();
|
|
if ((narcs_ + nfinals) != ncompacts_) {
|
|
FSTERROR() << "CompactArcStore: ArcCompactor incompatible with FST";
|
|
error_ = true;
|
|
return;
|
|
}
|
|
compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
|
|
sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
|
|
compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
|
|
}
|
|
size_t pos = 0;
|
|
size_t fpos = 0;
|
|
for (size_t s = 0; s < nstates_; ++s) {
|
|
fpos = pos;
|
|
if (arc_compactor.Size() == -1) states_[s] = pos;
|
|
if (fst.Final(s) != Weight::Zero()) {
|
|
compacts_[pos++] = arc_compactor.Compact(
|
|
s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
|
|
}
|
|
for (ArcIterator<Fst<Arc>> aiter(fst, s); !aiter.Done(); aiter.Next()) {
|
|
compacts_[pos++] = arc_compactor.Compact(s, aiter.Value());
|
|
}
|
|
if ((arc_compactor.Size() != -1) && (pos != fpos + arc_compactor.Size())) {
|
|
FSTERROR() << "CompactArcStore: ArcCompactor incompatible with FST";
|
|
error_ = true;
|
|
return;
|
|
}
|
|
}
|
|
if (pos != ncompacts_) {
|
|
FSTERROR() << "CompactArcStore: ArcCompactor incompatible with FST";
|
|
error_ = true;
|
|
return;
|
|
}
|
|
}
|
|
|
|
template <class Element, class Unsigned>
|
|
template <class Iterator, class ArcCompactor>
|
|
CompactArcStore<Element, Unsigned>::CompactArcStore(
|
|
const Iterator begin, const Iterator end,
|
|
const ArcCompactor &arc_compactor) {
|
|
using Arc = typename ArcCompactor::Arc;
|
|
using Weight = typename Arc::Weight;
|
|
if (arc_compactor.Size() != -1) {
|
|
ncompacts_ = std::distance(begin, end);
|
|
if (arc_compactor.Size() == 1) {
|
|
// For strings, allows implicit final weight. Empty input is the empty
|
|
// string.
|
|
if (ncompacts_ == 0) {
|
|
++ncompacts_;
|
|
} else {
|
|
const auto arc =
|
|
arc_compactor.Expand(ncompacts_ - 1, *(begin + (ncompacts_ - 1)));
|
|
if (arc.ilabel != kNoLabel) ++ncompacts_;
|
|
}
|
|
}
|
|
if (ncompacts_ % arc_compactor.Size()) {
|
|
FSTERROR() << "CompactArcStore: Size of input container incompatible"
|
|
<< " with arc compactor";
|
|
error_ = true;
|
|
return;
|
|
}
|
|
if (ncompacts_ == 0) return;
|
|
start_ = 0;
|
|
nstates_ = ncompacts_ / arc_compactor.Size();
|
|
compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
|
|
sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
|
|
compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
|
|
size_t i = 0;
|
|
Iterator it = begin;
|
|
for (; it != end; ++it, ++i) {
|
|
compacts_[i] = *it;
|
|
if (arc_compactor.Expand(i, *it).ilabel != kNoLabel) ++narcs_;
|
|
}
|
|
if (i < ncompacts_) {
|
|
compacts_[i] = arc_compactor.Compact(
|
|
i, Arc(kNoLabel, kNoLabel, Weight::One(), kNoStateId));
|
|
}
|
|
} else {
|
|
if (std::distance(begin, end) == 0) return;
|
|
// Count # of states, arcs and compacts.
|
|
auto it = begin;
|
|
for (size_t i = 0; it != end; ++it, ++i) {
|
|
const auto arc = arc_compactor.Expand(i, *it);
|
|
if (arc.ilabel != kNoLabel) {
|
|
++narcs_;
|
|
++ncompacts_;
|
|
} else {
|
|
++nstates_;
|
|
if (arc.weight != Weight::Zero()) ++ncompacts_;
|
|
}
|
|
}
|
|
start_ = 0;
|
|
compacts_region_ = fst::WrapUnique(MappedFile::Allocate(
|
|
sizeof(compacts_[0]) * ncompacts_, alignof(decltype(compacts_[0]))));
|
|
compacts_ = static_cast<Element *>(compacts_region_->mutable_data());
|
|
states_region_ = fst::WrapUnique(MappedFile::Allocate(
|
|
sizeof(states_[0]) * (nstates_ + 1), alignof(decltype(states_[0]))));
|
|
states_ = static_cast<Unsigned *>(states_region_->mutable_data());
|
|
states_[nstates_] = ncompacts_;
|
|
size_t i = 0;
|
|
size_t s = 0;
|
|
for (it = begin; it != end; ++it) {
|
|
const auto arc = arc_compactor.Expand(i, *it);
|
|
if (arc.ilabel != kNoLabel) {
|
|
compacts_[i++] = *it;
|
|
} else {
|
|
states_[s++] = i;
|
|
if (arc.weight != Weight::Zero()) compacts_[i++] = *it;
|
|
}
|
|
}
|
|
if ((s != nstates_) || (i != ncompacts_)) {
|
|
FSTERROR() << "CompactArcStore: Ill-formed input container";
|
|
error_ = true;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
template <class Element, class Unsigned>
|
|
template <class ArcCompactor>
|
|
CompactArcStore<Element, Unsigned> *CompactArcStore<Element, Unsigned>::Read(
|
|
std::istream &strm, const FstReadOptions &opts, const FstHeader &hdr,
|
|
const ArcCompactor &arc_compactor) {
|
|
auto data = std::make_unique<CompactArcStore>();
|
|
data->start_ = hdr.Start();
|
|
data->nstates_ = hdr.NumStates();
|
|
data->narcs_ = hdr.NumArcs();
|
|
if (arc_compactor.Size() == -1) {
|
|
if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
|
|
LOG(ERROR) << "CompactArcStore::Read: Alignment failed: " << opts.source;
|
|
return nullptr;
|
|
}
|
|
auto b = (data->nstates_ + 1) * sizeof(Unsigned);
|
|
data->states_region_.reset(MappedFile::Map(
|
|
strm, opts.mode == FstReadOptions::MAP, opts.source, b));
|
|
if (!strm || !data->states_region_) {
|
|
LOG(ERROR) << "CompactArcStore::Read: Read failed: " << opts.source;
|
|
return nullptr;
|
|
}
|
|
data->states_ =
|
|
static_cast<Unsigned *>(data->states_region_->mutable_data());
|
|
} else {
|
|
data->states_ = nullptr;
|
|
}
|
|
data->ncompacts_ = arc_compactor.Size() == -1
|
|
? data->states_[data->nstates_]
|
|
: data->nstates_ * arc_compactor.Size();
|
|
if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
|
|
LOG(ERROR) << "CompactArcStore::Read: Alignment failed: " << opts.source;
|
|
return nullptr;
|
|
}
|
|
size_t b = data->ncompacts_ * sizeof(Element);
|
|
data->compacts_region_.reset(
|
|
MappedFile::Map(strm, opts.mode == FstReadOptions::MAP, opts.source, b));
|
|
if (!strm || !data->compacts_region_) {
|
|
LOG(ERROR) << "CompactArcStore::Read: Read failed: " << opts.source;
|
|
return nullptr;
|
|
}
|
|
data->compacts_ =
|
|
static_cast<Element *>(data->compacts_region_->mutable_data());
|
|
return data.release();
|
|
}
|
|
|
|
template <class Element, class Unsigned>
|
|
bool CompactArcStore<Element, Unsigned>::Write(
|
|
std::ostream &strm, const FstWriteOptions &opts) const {
|
|
if (states_) {
|
|
if (opts.align && !AlignOutput(strm)) {
|
|
LOG(ERROR) << "CompactArcStore::Write: Alignment failed: " << opts.source;
|
|
return false;
|
|
}
|
|
strm.write(reinterpret_cast<const char *>(states_),
|
|
(nstates_ + 1) * sizeof(Unsigned));
|
|
}
|
|
if (opts.align && !AlignOutput(strm)) {
|
|
LOG(ERROR) << "CompactArcStore::Write: Alignment failed: " << opts.source;
|
|
return false;
|
|
}
|
|
strm.write(reinterpret_cast<const char *>(compacts_),
|
|
ncompacts_ * sizeof(Element));
|
|
strm.flush();
|
|
if (!strm) {
|
|
LOG(ERROR) << "CompactArcStore::Write: Write failed: " << opts.source;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <class Element, class Unsigned>
|
|
const std::string &CompactArcStore<Element, Unsigned>::Type() {
|
|
static const std::string *const type = new std::string("compact");
|
|
return *type;
|
|
}
|
|
|
|
template <class C, class U, class S>
|
|
class CompactArcState;
|
|
|
|
// Wraps an old-style arc compactor and a compact store as a new Fst compactor.
|
|
// The copy constructors of AC and S must make thread-safe copies and should
|
|
// be O(1).
|
|
template <class AC, class U,
|
|
class S /*= CompactArcStore<typename AC::Element, U>*/>
|
|
class CompactArcCompactor {
|
|
public:
|
|
using ArcCompactor = AC;
|
|
using Unsigned = U;
|
|
using CompactStore = S;
|
|
using Element = typename AC::Element;
|
|
using Arc = typename AC::Arc;
|
|
using StateId = typename Arc::StateId;
|
|
using Weight = typename Arc::Weight;
|
|
using State = CompactArcState<AC, U, S>;
|
|
friend State;
|
|
|
|
CompactArcCompactor() : arc_compactor_(nullptr), compact_store_(nullptr) {}
|
|
|
|
// Constructs from Fst.
|
|
explicit CompactArcCompactor(const Fst<Arc> &fst,
|
|
ArcCompactor &&arc_compactor = ArcCompactor())
|
|
: CompactArcCompactor(
|
|
fst, std::make_shared<ArcCompactor>(std::move(arc_compactor))) {}
|
|
|
|
CompactArcCompactor(const Fst<Arc> &fst,
|
|
std::shared_ptr<ArcCompactor> arc_compactor)
|
|
: arc_compactor_(std::move(arc_compactor)),
|
|
compact_store_(std::make_shared<S>(fst, *arc_compactor_)) {}
|
|
|
|
CompactArcCompactor(const Fst<Arc> &fst,
|
|
std::shared_ptr<CompactArcCompactor> compactor)
|
|
: arc_compactor_(compactor->arc_compactor_),
|
|
compact_store_(compactor->compact_store_ == nullptr
|
|
? std::make_shared<S>(fst, *arc_compactor_)
|
|
: compactor->compact_store_) {}
|
|
|
|
// Constructs from CompactStore.
|
|
CompactArcCompactor(std::shared_ptr<ArcCompactor> arc_compactor,
|
|
std::shared_ptr<CompactStore> compact_store)
|
|
: arc_compactor_(std::move(arc_compactor)),
|
|
compact_store_(std::move(compact_store)) {}
|
|
|
|
// The following 2 constructors take as input two iterators delimiting a set
|
|
// of (already) compacted transitions, starting with the transitions out of
|
|
// the initial state. The format of the input differs for fixed out-degree
|
|
// and variable out-degree arc compactors.
|
|
//
|
|
// - For fixed out-degree arc compactors, the final weight (encoded as a
|
|
// compacted transition) needs to be given only for final states. All strings
|
|
// (arc compactor of size 1) will be assume to be terminated by a final state
|
|
// even when the final state is not implicitely given.
|
|
//
|
|
// - For variable out-degree arc compactors, the final weight (encoded as a
|
|
// compacted transition) needs to be given for all states and must appeared
|
|
// first in the list (for state s, final weight of s, followed by outgoing
|
|
// transitons in s).
|
|
//
|
|
// These 2 constructors allows the direct construction of a CompactArcFst
|
|
// without first creating a more memory-hungry regular FST. This is useful
|
|
// when memory usage is severely constrained.
|
|
//
|
|
// Usage:
|
|
// CompactArcFst<...> fst(
|
|
// std::make_shared<CompactArcFst<...>::Compactor>(b, e));
|
|
template <class Iterator>
|
|
CompactArcCompactor(const Iterator b, const Iterator e,
|
|
std::shared_ptr<ArcCompactor> arc_compactor)
|
|
: arc_compactor_(std::move(arc_compactor)),
|
|
compact_store_(std::make_shared<S>(b, e, *arc_compactor_)) {}
|
|
|
|
template <class Iterator>
|
|
CompactArcCompactor(const Iterator b, const Iterator e)
|
|
: CompactArcCompactor(b, e, std::make_shared<ArcCompactor>()) {}
|
|
|
|
// Copy constructor. This makes a thread-safe copy, so requires that
|
|
// The ArcCompactor and CompactStore copy constructors make thread-safe
|
|
// copies.
|
|
CompactArcCompactor(const CompactArcCompactor &compactor)
|
|
: arc_compactor_(
|
|
compactor.GetArcCompactor() == nullptr
|
|
? nullptr
|
|
: std::make_shared<ArcCompactor>(*compactor.GetArcCompactor())),
|
|
compact_store_(compactor.GetCompactStore() == nullptr
|
|
? nullptr
|
|
: std::make_shared<CompactStore>(
|
|
*compactor.GetCompactStore())) {}
|
|
|
|
template <class OtherC>
|
|
explicit CompactArcCompactor(
|
|
const CompactArcCompactor<OtherC, U, S> &compactor)
|
|
: arc_compactor_(
|
|
compactor.GetArcCompactor() == nullptr
|
|
? nullptr
|
|
: std::make_shared<ArcCompactor>(*compactor.GetArcCompactor())),
|
|
compact_store_(compactor.GetCompactStore() == nullptr
|
|
? nullptr
|
|
: std::make_shared<CompactStore>(
|
|
*compactor.GetCompactStore())) {}
|
|
|
|
StateId Start() const { return compact_store_->Start(); }
|
|
StateId NumStates() const { return compact_store_->NumStates(); }
|
|
size_t NumArcs() const { return compact_store_->NumArcs(); }
|
|
|
|
void SetState(StateId s, State *state) const {
|
|
if (state->GetStateId() != s) state->Set(this, s);
|
|
}
|
|
|
|
static CompactArcCompactor *Read(std::istream &strm,
|
|
const FstReadOptions &opts,
|
|
const FstHeader &hdr) {
|
|
std::shared_ptr<ArcCompactor> arc_compactor(ArcCompactor::Read(strm));
|
|
if (arc_compactor == nullptr) return nullptr;
|
|
std::shared_ptr<S> compact_store(S::Read(strm, opts, hdr, *arc_compactor));
|
|
if (compact_store == nullptr) return nullptr;
|
|
return new CompactArcCompactor(arc_compactor, compact_store);
|
|
}
|
|
|
|
bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
|
|
return arc_compactor_->Write(strm) && compact_store_->Write(strm, opts);
|
|
}
|
|
|
|
uint64_t Properties(uint64_t props) const {
|
|
// ArcCompactor properties can just be or-ed in since it is assumed that
|
|
// if the ArcCompactor sets a property, any FST with the inverse
|
|
// property is incompatible.
|
|
return arc_compactor_->Properties() | props;
|
|
}
|
|
|
|
bool IsCompatible(const Fst<Arc> &fst) const {
|
|
return arc_compactor_->Compatible(fst);
|
|
}
|
|
|
|
bool Error() const { return compact_store_->Error(); }
|
|
|
|
bool HasFixedOutdegree() const { return arc_compactor_->Size() != -1; }
|
|
|
|
static const std::string &Type() {
|
|
static const std::string *const type = [] {
|
|
std::string type = "compact";
|
|
if (sizeof(U) != sizeof(uint32_t)) type += std::to_string(8 * sizeof(U));
|
|
type += "_";
|
|
type += ArcCompactor::Type();
|
|
if (CompactStore::Type() != "compact") {
|
|
type += "_";
|
|
type += CompactStore::Type();
|
|
}
|
|
return new std::string(type);
|
|
}();
|
|
return *type;
|
|
}
|
|
|
|
const ArcCompactor *GetArcCompactor() const { return arc_compactor_.get(); }
|
|
const CompactStore *GetCompactStore() const { return compact_store_.get(); }
|
|
|
|
ArcCompactor *MutableArcCompactor() { return arc_compactor_.get(); }
|
|
CompactStore *MutableCompactStore() { return compact_store_.get(); }
|
|
|
|
std::shared_ptr<ArcCompactor> SharedArcCompactor() { return arc_compactor_; }
|
|
std::shared_ptr<CompactStore> SharedCompactStore() { return compact_store_; }
|
|
|
|
// TODO(allauzen): remove dependencies on this method and make private.
|
|
Arc ComputeArc(StateId s, Unsigned i, uint8_t flags) const {
|
|
return arc_compactor_->Expand(s, compact_store_->Compacts(i), flags);
|
|
}
|
|
|
|
private:
|
|
std::pair<Unsigned, Unsigned> CompactsRange(StateId s) const {
|
|
std::pair<size_t, size_t> range;
|
|
if (HasFixedOutdegree()) {
|
|
range.first = s * arc_compactor_->Size();
|
|
range.second = arc_compactor_->Size();
|
|
} else {
|
|
range.first = compact_store_->States(s);
|
|
range.second = compact_store_->States(s + 1) - range.first;
|
|
}
|
|
return range;
|
|
}
|
|
|
|
private:
|
|
std::shared_ptr<ArcCompactor> arc_compactor_;
|
|
std::shared_ptr<CompactStore> compact_store_;
|
|
};
|
|
|
|
// Default implementation of state attributes accessor class for
|
|
// CompactArcCompactor. Use of efficient specialization strongly encouraged.
|
|
template <class ArcCompactor, class U, class S>
|
|
class CompactArcState {
|
|
public:
|
|
using Arc = typename ArcCompactor::Arc;
|
|
using StateId = typename Arc::StateId;
|
|
using Weight = typename Arc::Weight;
|
|
using Compactor = CompactArcCompactor<ArcCompactor, U, S>;
|
|
|
|
CompactArcState() = default;
|
|
|
|
CompactArcState(const Compactor *compactor, StateId s)
|
|
: compactor_(compactor),
|
|
s_(s),
|
|
range_(compactor->CompactsRange(s)),
|
|
has_final_(
|
|
range_.second != 0 &&
|
|
compactor->ComputeArc(s, range_.first, kArcILabelValue).ilabel ==
|
|
kNoLabel) {
|
|
if (has_final_) {
|
|
++range_.first;
|
|
--range_.second;
|
|
}
|
|
}
|
|
|
|
void Set(const Compactor *compactor, StateId s) {
|
|
compactor_ = compactor;
|
|
s_ = s;
|
|
range_ = compactor->CompactsRange(s);
|
|
if (range_.second != 0 &&
|
|
compactor->ComputeArc(s, range_.first, kArcILabelValue).ilabel ==
|
|
kNoLabel) {
|
|
has_final_ = true;
|
|
++range_.first;
|
|
--range_.second;
|
|
} else {
|
|
has_final_ = false;
|
|
}
|
|
}
|
|
|
|
StateId GetStateId() const { return s_; }
|
|
|
|
Weight Final() const {
|
|
if (!has_final_) return Weight::Zero();
|
|
return compactor_->ComputeArc(s_, range_.first - 1, kArcWeightValue).weight;
|
|
}
|
|
|
|
size_t NumArcs() const { return range_.second; }
|
|
|
|
Arc GetArc(size_t i, uint8_t flags) const {
|
|
return compactor_->ComputeArc(s_, range_.first + i, flags);
|
|
}
|
|
|
|
private:
|
|
const Compactor *compactor_ = nullptr; // borrowed ref.
|
|
StateId s_ = kNoStateId;
|
|
std::pair<U, U> range_ = {0, 0};
|
|
bool has_final_ = false;
|
|
};
|
|
|
|
// Specialization for CompactArcStore.
|
|
template <class ArcCompactor, class U>
|
|
class CompactArcState<ArcCompactor, U,
|
|
CompactArcStore<typename ArcCompactor::Element, U>> {
|
|
public:
|
|
using Arc = typename ArcCompactor::Arc;
|
|
using StateId = typename Arc::StateId;
|
|
using Weight = typename Arc::Weight;
|
|
using CompactStore = CompactArcStore<typename ArcCompactor::Element, U>;
|
|
using Compactor = CompactArcCompactor<ArcCompactor, U, CompactStore>;
|
|
|
|
CompactArcState() = default;
|
|
|
|
CompactArcState(const Compactor *compactor, StateId s)
|
|
: arc_compactor_(compactor->GetArcCompactor()), s_(s) {
|
|
Init(compactor);
|
|
}
|
|
|
|
void Set(const Compactor *compactor, StateId s) {
|
|
arc_compactor_ = compactor->GetArcCompactor();
|
|
s_ = s;
|
|
has_final_ = false;
|
|
Init(compactor);
|
|
}
|
|
|
|
StateId GetStateId() const { return s_; }
|
|
|
|
Weight Final() const {
|
|
if (!has_final_) return Weight::Zero();
|
|
return arc_compactor_->Expand(s_, *(compacts_ - 1), kArcWeightValue).weight;
|
|
}
|
|
|
|
size_t NumArcs() const { return num_arcs_; }
|
|
|
|
Arc GetArc(size_t i, uint8_t flags) const {
|
|
return arc_compactor_->Expand(s_, compacts_[i], flags);
|
|
}
|
|
|
|
private:
|
|
void Init(const Compactor *compactor) {
|
|
const auto *store = compactor->GetCompactStore();
|
|
U offset;
|
|
if (!compactor->HasFixedOutdegree()) { // Variable out-degree compactor.
|
|
offset = store->States(s_);
|
|
num_arcs_ = store->States(s_ + 1) - offset;
|
|
} else { // Fixed out-degree compactor.
|
|
offset = s_ * arc_compactor_->Size();
|
|
num_arcs_ = arc_compactor_->Size();
|
|
}
|
|
if (num_arcs_ > 0) {
|
|
compacts_ = &(store->Compacts(offset));
|
|
if (arc_compactor_->Expand(s_, *compacts_, kArcILabelValue).ilabel ==
|
|
kNoStateId) {
|
|
++compacts_;
|
|
--num_arcs_;
|
|
has_final_ = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
const ArcCompactor *arc_compactor_ = nullptr; // Borrowed reference.
|
|
const typename ArcCompactor::Element *compacts_ =
|
|
nullptr; // Borrowed reference.
|
|
StateId s_ = kNoStateId;
|
|
U num_arcs_ = 0;
|
|
bool has_final_ = false;
|
|
};
|
|
|
|
template <class F, class G>
|
|
void Cast(const F &, G *);
|
|
|
|
template <class CompactArcFST, class FST>
|
|
bool WriteCompactArcFst(
|
|
const FST &fst,
|
|
const typename CompactArcFST::Compactor::ArcCompactor &arc_compactor,
|
|
std::ostream &strm, const FstWriteOptions &opts);
|
|
|
|
namespace internal {
|
|
|
|
// Implementation class for CompactFst, which contains parametrizeable
|
|
// Fst data storage (CompactArcStore by default) and Fst cache.
|
|
// C's copy constructor must make a thread-safe copy.
|
|
template <class Arc, class C, class CacheStore = DefaultCacheStore<Arc>>
|
|
class CompactFstImpl
|
|
: public CacheBaseImpl<typename CacheStore::State, CacheStore> {
|
|
public:
|
|
using Weight = typename Arc::Weight;
|
|
using StateId = typename Arc::StateId;
|
|
using Compactor = C;
|
|
|
|
using FstImpl<Arc>::SetType;
|
|
using FstImpl<Arc>::SetProperties;
|
|
using FstImpl<Arc>::Properties;
|
|
using FstImpl<Arc>::SetInputSymbols;
|
|
using FstImpl<Arc>::SetOutputSymbols;
|
|
using FstImpl<Arc>::WriteHeader;
|
|
|
|
using ImplBase = CacheBaseImpl<typename CacheStore::State, CacheStore>;
|
|
using ImplBase::HasArcs;
|
|
using ImplBase::HasFinal;
|
|
using ImplBase::HasStart;
|
|
using ImplBase::PushArc;
|
|
using ImplBase::SetArcs;
|
|
using ImplBase::SetFinal;
|
|
using ImplBase::SetStart;
|
|
|
|
CompactFstImpl()
|
|
: ImplBase(CompactFstOptions()),
|
|
compactor_(std::make_shared<Compactor>()) {
|
|
SetType(Compactor::Type());
|
|
SetProperties(kNullProperties | kStaticProperties);
|
|
}
|
|
|
|
// Constructs a CompactFstImpl, creating a new Compactor using
|
|
// Compactor(fst, compactor); this uses the compactor arg only for optional
|
|
// information, such as compression level. See the Compactor interface
|
|
// description.
|
|
CompactFstImpl(const Fst<Arc> &fst, std::shared_ptr<Compactor> compactor,
|
|
const CompactFstOptions &opts)
|
|
: ImplBase(opts),
|
|
compactor_(std::make_shared<Compactor>(fst, std::move(compactor))) {
|
|
SetType(Compactor::Type());
|
|
SetInputSymbols(fst.InputSymbols());
|
|
SetOutputSymbols(fst.OutputSymbols());
|
|
if (compactor_->Error()) SetProperties(kError, kError);
|
|
uint64_t copy_properties =
|
|
fst.Properties(kMutable, false)
|
|
? fst.Properties(kCopyProperties, true)
|
|
: CheckProperties(
|
|
fst, kCopyProperties & ~kWeightedCycles & ~kUnweightedCycles,
|
|
kCopyProperties);
|
|
if ((copy_properties & kError) || !compactor_->IsCompatible(fst)) {
|
|
FSTERROR() << "CompactFstImpl: Input Fst incompatible with compactor";
|
|
SetProperties(kError, kError);
|
|
return;
|
|
}
|
|
SetProperties(compactor_->Properties(copy_properties) | kStaticProperties);
|
|
}
|
|
|
|
CompactFstImpl(std::shared_ptr<Compactor> compactor,
|
|
const CompactFstOptions &opts)
|
|
: ImplBase(opts), compactor_(std::move(compactor)) {
|
|
SetType(Compactor::Type());
|
|
SetProperties(kStaticProperties | compactor_->Properties(0));
|
|
if (compactor_->Error()) SetProperties(kError, kError);
|
|
}
|
|
|
|
// Makes a thread-safe copy; requires that Compactor's copy constructor
|
|
// does so as well.
|
|
CompactFstImpl(const CompactFstImpl &impl)
|
|
: ImplBase(impl),
|
|
compactor_(impl.compactor_ == nullptr
|
|
? std::make_shared<Compactor>()
|
|
: std::make_shared<Compactor>(*impl.compactor_)) {
|
|
SetType(impl.Type());
|
|
SetProperties(impl.Properties());
|
|
SetInputSymbols(impl.InputSymbols());
|
|
SetOutputSymbols(impl.OutputSymbols());
|
|
}
|
|
|
|
// Allows to change the cache store from OtherCacheStore to CacheStore.
|
|
template <class OtherCacheStore>
|
|
explicit CompactFstImpl(
|
|
const CompactFstImpl<Arc, Compactor, OtherCacheStore> &impl)
|
|
: ImplBase(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
|
|
compactor_(impl.compactor_ == nullptr
|
|
? std::make_shared<Compactor>()
|
|
: std::make_shared<Compactor>(*impl.compactor_)) {
|
|
SetType(impl.Type());
|
|
SetProperties(impl.Properties());
|
|
SetInputSymbols(impl.InputSymbols());
|
|
SetOutputSymbols(impl.OutputSymbols());
|
|
}
|
|
|
|
StateId Start() {
|
|
if (!HasStart()) SetStart(compactor_->Start());
|
|
return ImplBase::Start();
|
|
}
|
|
|
|
Weight Final(StateId s) {
|
|
if (HasFinal(s)) return ImplBase::Final(s);
|
|
compactor_->SetState(s, &state_);
|
|
return state_.Final();
|
|
}
|
|
|
|
StateId NumStates() const {
|
|
if (Properties(kError)) return 0;
|
|
return compactor_->NumStates();
|
|
}
|
|
|
|
size_t NumArcs(StateId s) {
|
|
if (HasArcs(s)) return ImplBase::NumArcs(s);
|
|
compactor_->SetState(s, &state_);
|
|
return state_.NumArcs();
|
|
}
|
|
|
|
size_t NumInputEpsilons(StateId s) {
|
|
if (!HasArcs(s) && !Properties(kILabelSorted)) Expand(s);
|
|
if (HasArcs(s)) return ImplBase::NumInputEpsilons(s);
|
|
return CountEpsilons(s, false);
|
|
}
|
|
|
|
size_t NumOutputEpsilons(StateId s) {
|
|
if (!HasArcs(s) && !Properties(kOLabelSorted)) Expand(s);
|
|
if (HasArcs(s)) return ImplBase::NumOutputEpsilons(s);
|
|
return CountEpsilons(s, true);
|
|
}
|
|
|
|
size_t CountEpsilons(StateId s, bool output_epsilons) {
|
|
compactor_->SetState(s, &state_);
|
|
const uint8_t flags = output_epsilons ? kArcOLabelValue : kArcILabelValue;
|
|
size_t num_eps = 0;
|
|
const size_t num_arcs = state_.NumArcs();
|
|
for (size_t i = 0; i < num_arcs; ++i) {
|
|
const auto &arc = state_.GetArc(i, flags);
|
|
const auto label = output_epsilons ? arc.olabel : arc.ilabel;
|
|
if (label == 0) {
|
|
++num_eps;
|
|
} else if (label > 0) {
|
|
break;
|
|
}
|
|
}
|
|
return num_eps;
|
|
}
|
|
|
|
static CompactFstImpl *Read(std::istream &strm, const FstReadOptions &opts) {
|
|
auto impl = std::make_unique<CompactFstImpl>();
|
|
FstHeader hdr;
|
|
if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
|
|
return nullptr;
|
|
}
|
|
// Ensures compatibility.
|
|
if (hdr.Version() == kAlignedFileVersion) {
|
|
hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
|
|
}
|
|
impl->compactor_ =
|
|
std::shared_ptr<Compactor>(Compactor::Read(strm, opts, hdr));
|
|
if (!impl->compactor_) {
|
|
return nullptr;
|
|
}
|
|
return impl.release();
|
|
}
|
|
|
|
bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
|
|
FstHeader hdr;
|
|
hdr.SetStart(compactor_->Start());
|
|
hdr.SetNumStates(compactor_->NumStates());
|
|
hdr.SetNumArcs(compactor_->NumArcs());
|
|
// Ensures compatibility.
|
|
const auto file_version = opts.align ? kAlignedFileVersion : kFileVersion;
|
|
WriteHeader(strm, opts, file_version, &hdr);
|
|
return compactor_->Write(strm, opts);
|
|
}
|
|
|
|
// Provides information needed for generic state iterator.
|
|
void InitStateIterator(StateIteratorData<Arc> *data) const {
|
|
data->base = nullptr;
|
|
data->nstates = compactor_->NumStates();
|
|
}
|
|
|
|
void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) {
|
|
if (!HasArcs(s)) Expand(s);
|
|
ImplBase::InitArcIterator(s, data);
|
|
}
|
|
|
|
void Expand(StateId s) {
|
|
compactor_->SetState(s, &state_);
|
|
const size_t num_arcs = state_.NumArcs();
|
|
for (size_t i = 0; i < num_arcs; ++i)
|
|
PushArc(s, state_.GetArc(i, kArcValueFlags));
|
|
SetArcs(s);
|
|
if (!HasFinal(s)) SetFinal(s, state_.Final());
|
|
}
|
|
|
|
const Compactor *GetCompactor() const { return compactor_.get(); }
|
|
Compactor *MutableCompactor() { return compactor_.get(); }
|
|
std::shared_ptr<Compactor> SharedCompactor() { return compactor_; }
|
|
void SetCompactor(std::shared_ptr<Compactor> compactor) {
|
|
// TODO(allauzen): is this correct? is this needed?
|
|
// TODO(allauzen): consider removing and forcing this through direct calls
|
|
// to compactor.
|
|
compactor_ = std::move(compactor);
|
|
}
|
|
|
|
// Properties always true of this FST class.
|
|
static constexpr uint64_t kStaticProperties = kExpanded;
|
|
|
|
protected:
|
|
template <class OtherArc, class OtherCompactor, class OtherCacheStore>
|
|
explicit CompactFstImpl(
|
|
const CompactFstImpl<OtherArc, OtherCompactor, OtherCacheStore> &impl)
|
|
: compactor_(std::make_shared<Compactor>(*impl.GetCompactor())) {
|
|
SetType(impl.Type());
|
|
SetProperties(impl.Properties());
|
|
SetInputSymbols(impl.InputSymbols());
|
|
SetOutputSymbols(impl.OutputSymbols());
|
|
}
|
|
|
|
private:
|
|
// For k*Version constants.
|
|
template <class CompactArcFST, class FST>
|
|
friend bool ::fst::WriteCompactArcFst(
|
|
const FST &fst,
|
|
const typename CompactArcFST::Compactor::ArcCompactor &arc_compactor,
|
|
std::ostream &strm, const FstWriteOptions &opts);
|
|
|
|
// Current unaligned file format version.
|
|
static constexpr int kFileVersion = 2;
|
|
// Current aligned file format version.
|
|
static constexpr int kAlignedFileVersion = 1;
|
|
// Minimum file format version supported.
|
|
static constexpr int kMinFileVersion = 1;
|
|
|
|
std::shared_ptr<Compactor> compactor_;
|
|
typename Compactor::State state_;
|
|
};
|
|
|
|
// Returns the compactor for the CompactFst; intended to be called as
|
|
// GetCompactor<CompactorType>(fst), which returns the compactor only if it
|
|
// is of the specified type and otherwise nullptr (via the overload below).
|
|
template <class Compactor, class Arc>
|
|
const Compactor *GetCompactor(const CompactFst<Arc, Compactor> &fst) {
|
|
return fst.GetCompactor();
|
|
}
|
|
|
|
template <class Compactor, class Arc>
|
|
const Compactor *GetCompactor(const Fst<Arc> &fst) {
|
|
return nullptr;
|
|
}
|
|
|
|
} // namespace internal
|
|
|
|
// This class attaches interface to implementation and handles reference
|
|
// counting, delegating most methods to ImplToExpandedFst.
|
|
// (Template argument defaults are declared in fst-decl.h.)
|
|
template <class A, class C, class CacheStore>
|
|
class CompactFst
|
|
: public ImplToExpandedFst<internal::CompactFstImpl<A, C, CacheStore>> {
|
|
public:
|
|
template <class F, class G>
|
|
void friend Cast(const F &, G *);
|
|
|
|
using Arc = A;
|
|
using StateId = typename Arc::StateId;
|
|
using Compactor = C;
|
|
using Impl = internal::CompactFstImpl<Arc, Compactor, CacheStore>;
|
|
using Store = CacheStore; // for CacheArcIterator
|
|
|
|
friend class StateIterator<CompactFst>;
|
|
friend class ArcIterator<CompactFst>;
|
|
|
|
CompactFst() : ImplToExpandedFst<Impl>(std::make_shared<Impl>()) {}
|
|
|
|
explicit CompactFst(const Fst<Arc> &fst,
|
|
const CompactFstOptions &opts = CompactFstOptions())
|
|
: CompactFst(fst, std::make_shared<Compactor>(fst), opts) {}
|
|
|
|
// Constructs a CompactFst, creating a new Compactor using
|
|
// Compactor(fst, compactor); this uses the compactor arg only for optional
|
|
// information, such as compression level. See the Compactor interface
|
|
// description.
|
|
CompactFst(const Fst<Arc> &fst, std::shared_ptr<Compactor> compactor,
|
|
const CompactFstOptions &opts = CompactFstOptions())
|
|
: ImplToExpandedFst<Impl>(
|
|
std::make_shared<Impl>(fst, std::move(compactor), opts)) {}
|
|
|
|
// Convenience constructor taking a Compactor rvalue ref. Avoids
|
|
// clutter of make_shared<Compactor> at call site.
|
|
// Constructs a CompactFst, creating a new Compactor using
|
|
// Compactor(fst, compactor); this uses the compactor arg only for optional
|
|
// information, such as compression level. See the Compactor interface
|
|
// description.
|
|
CompactFst(const Fst<Arc> &fst, Compactor &&compactor,
|
|
const CompactFstOptions &opts = CompactFstOptions())
|
|
: CompactFst(fst, std::make_shared<Compactor>(std::move(compactor)),
|
|
opts) {}
|
|
|
|
explicit CompactFst(std::shared_ptr<Compactor> compactor,
|
|
const CompactFstOptions &opts = CompactFstOptions())
|
|
: ImplToExpandedFst<Impl>(
|
|
std::make_shared<Impl>(std::move(compactor), opts)) {}
|
|
|
|
// See Fst<>::Copy() for doc.
|
|
CompactFst(const CompactFst &fst, bool safe = false)
|
|
: ImplToExpandedFst<Impl>(fst, safe) {}
|
|
|
|
// Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
|
|
CompactFst *Copy(bool safe = false) const override {
|
|
return new CompactFst(*this, safe);
|
|
}
|
|
|
|
// Read a CompactFst from an input stream; return nullptr on error
|
|
static CompactFst *Read(std::istream &strm, const FstReadOptions &opts) {
|
|
auto *impl = Impl::Read(strm, opts);
|
|
return impl ? new CompactFst(std::shared_ptr<Impl>(impl)) : nullptr;
|
|
}
|
|
|
|
// Read a CompactFst from a file; return nullptr on error
|
|
// Empty source reads from standard input
|
|
static CompactFst *Read(std::string_view source) {
|
|
auto *impl = ImplToExpandedFst<Impl>::Read(source);
|
|
return impl ? new CompactFst(std::shared_ptr<Impl>(impl)) : nullptr;
|
|
}
|
|
|
|
bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
|
|
return GetImpl()->Write(strm, opts);
|
|
}
|
|
|
|
bool Write(const std::string &source) const override {
|
|
return Fst<Arc>::WriteFile(source);
|
|
}
|
|
|
|
void InitStateIterator(StateIteratorData<Arc> *data) const override {
|
|
GetImpl()->InitStateIterator(data);
|
|
}
|
|
|
|
void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
|
|
GetMutableImpl()->InitArcIterator(s, data);
|
|
}
|
|
|
|
MatcherBase<Arc> *InitMatcher(MatchType match_type) const override {
|
|
return new SortedMatcher<CompactFst>(*this, match_type);
|
|
}
|
|
|
|
const Compactor *GetCompactor() const { return GetImpl()->GetCompactor(); }
|
|
|
|
void SetCompactor(std::shared_ptr<Compactor> compactor) {
|
|
GetMutableImpl()->SetCompactor(std::move(compactor));
|
|
}
|
|
|
|
private:
|
|
using ImplToFst<Impl, ExpandedFst<Arc>>::GetImpl;
|
|
using ImplToFst<Impl, ExpandedFst<Arc>>::GetMutableImpl;
|
|
|
|
explicit CompactFst(std::shared_ptr<Impl> impl)
|
|
: ImplToExpandedFst<Impl>(std::move(impl)) {}
|
|
|
|
CompactFst &operator=(const CompactFst &fst) = delete;
|
|
};
|
|
|
|
// Writes FST in ArcCompacted format, with a possible pass over the machine
|
|
// before writing to compute the number of states and arcs.
|
|
template <class CompactArcFST, class FST>
|
|
bool WriteCompactArcFst(
|
|
const FST &fst,
|
|
const typename CompactArcFST::Compactor::ArcCompactor &arc_compactor,
|
|
std::ostream &strm, const FstWriteOptions &opts) {
|
|
using Arc = typename CompactArcFST::Arc;
|
|
using Compactor = typename CompactArcFST::Compactor;
|
|
using ArcCompactor = typename Compactor::ArcCompactor;
|
|
using CompactStore = typename Compactor::CompactStore;
|
|
using Element = typename ArcCompactor::Element;
|
|
using Impl = typename CompactArcFST::Impl;
|
|
using Unsigned = typename Compactor::Unsigned;
|
|
using Weight = typename Arc::Weight;
|
|
const auto file_version =
|
|
opts.align ? Impl::kAlignedFileVersion : Impl::kFileVersion;
|
|
size_t num_arcs = -1;
|
|
size_t num_states = -1;
|
|
auto first_pass_arc_compactor = arc_compactor;
|
|
// Note that GetCompactor will only return non-null if the compactor has the
|
|
// exact type Compactor == CompactArcFst::Compactor. This is what we want;
|
|
// other types must do an extra pass to set the arc compactor state.
|
|
if (const Compactor *const compactor =
|
|
internal::GetCompactor<Compactor>(fst)) {
|
|
num_arcs = compactor->NumArcs();
|
|
num_states = compactor->NumStates();
|
|
first_pass_arc_compactor = *compactor->GetArcCompactor();
|
|
} else {
|
|
// A first pass is needed to compute the state of the compactor, which
|
|
// is saved ahead of the rest of the data structures. This unfortunately
|
|
// means forcing a complete double compaction when writing in this format.
|
|
// TODO(allauzen): eliminate mutable state from compactors.
|
|
num_arcs = 0;
|
|
num_states = 0;
|
|
for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
|
|
const auto s = siter.Value();
|
|
++num_states;
|
|
if (fst.Final(s) != Weight::Zero()) {
|
|
first_pass_arc_compactor.Compact(
|
|
s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
|
|
}
|
|
for (ArcIterator<FST> aiter(fst, s); !aiter.Done(); aiter.Next()) {
|
|
++num_arcs;
|
|
first_pass_arc_compactor.Compact(s, aiter.Value());
|
|
}
|
|
}
|
|
}
|
|
FstHeader hdr;
|
|
hdr.SetStart(fst.Start());
|
|
hdr.SetNumStates(num_states);
|
|
hdr.SetNumArcs(num_arcs);
|
|
std::string type = "compact";
|
|
if (sizeof(Unsigned) != sizeof(uint32_t)) {
|
|
type += std::to_string(CHAR_BIT * sizeof(Unsigned));
|
|
}
|
|
type += "_";
|
|
type += ArcCompactor::Type();
|
|
if (CompactStore::Type() != "compact") {
|
|
type += "_";
|
|
type += CompactStore::Type();
|
|
}
|
|
const auto copy_properties = fst.Properties(kCopyProperties, true);
|
|
if ((copy_properties & kError) || !arc_compactor.Compatible(fst)) {
|
|
FSTERROR() << "Fst incompatible with compactor";
|
|
return false;
|
|
}
|
|
uint64_t properties = copy_properties | Impl::kStaticProperties;
|
|
internal::FstImpl<Arc>::WriteFstHeader(fst, strm, opts, file_version, type,
|
|
properties, &hdr);
|
|
first_pass_arc_compactor.Write(strm);
|
|
if (first_pass_arc_compactor.Size() == -1) {
|
|
if (opts.align && !AlignOutput(strm)) {
|
|
LOG(ERROR) << "WriteCompactArcFst: Alignment failed: " << opts.source;
|
|
return false;
|
|
}
|
|
Unsigned compacts = 0;
|
|
for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
|
|
const auto s = siter.Value();
|
|
strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
|
|
if (fst.Final(s) != Weight::Zero()) {
|
|
++compacts;
|
|
}
|
|
compacts += fst.NumArcs(s);
|
|
}
|
|
strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
|
|
}
|
|
if (opts.align && !AlignOutput(strm)) {
|
|
LOG(ERROR) << "Could not align file during write after writing states";
|
|
}
|
|
const auto &second_pass_arc_compactor = arc_compactor;
|
|
Element element;
|
|
for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
|
|
const auto s = siter.Value();
|
|
if (fst.Final(s) != Weight::Zero()) {
|
|
element = second_pass_arc_compactor.Compact(
|
|
s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
|
|
strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
|
|
}
|
|
for (ArcIterator<FST> aiter(fst, s); !aiter.Done(); aiter.Next()) {
|
|
element = second_pass_arc_compactor.Compact(s, aiter.Value());
|
|
strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
|
|
}
|
|
}
|
|
strm.flush();
|
|
if (!strm) {
|
|
LOG(ERROR) << "WriteCompactArcFst: Write failed: " << opts.source;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Specialization for CompactFst; see generic version in fst.h for sample
|
|
// usage (but use the CompactFst type!). This version should inline.
|
|
template <class Arc, class Compactor, class CacheStore>
|
|
class StateIterator<CompactFst<Arc, Compactor, CacheStore>> {
|
|
public:
|
|
using StateId = typename Arc::StateId;
|
|
|
|
explicit StateIterator(const CompactFst<Arc, Compactor, CacheStore> &fst)
|
|
: nstates_(fst.NumStates()), s_(0) {}
|
|
|
|
bool Done() const { return s_ >= nstates_; }
|
|
|
|
StateId Value() const { return s_; }
|
|
|
|
void Next() { ++s_; }
|
|
|
|
void Reset() { s_ = 0; }
|
|
|
|
private:
|
|
StateId nstates_;
|
|
StateId s_;
|
|
};
|
|
|
|
// Specialization for CompactFst. Never caches,
|
|
// always iterates over the underlying compact elements.
|
|
template <class Arc, class Compactor, class CacheStore>
|
|
class ArcIterator<CompactFst<Arc, Compactor, CacheStore>> {
|
|
public:
|
|
using StateId = typename Arc::StateId;
|
|
using State = typename Compactor::State;
|
|
|
|
ArcIterator(const CompactFst<Arc, Compactor, CacheStore> &fst, StateId s)
|
|
: state_(fst.GetMutableImpl()->MutableCompactor(), s),
|
|
pos_(0),
|
|
num_arcs_(state_.NumArcs()),
|
|
flags_(kArcValueFlags) {}
|
|
|
|
bool Done() const { return pos_ >= num_arcs_; }
|
|
|
|
const Arc &Value() const {
|
|
arc_ = state_.GetArc(pos_, flags_);
|
|
return arc_;
|
|
}
|
|
|
|
void Next() { ++pos_; }
|
|
|
|
size_t Position() const { return pos_; }
|
|
|
|
void Reset() { pos_ = 0; }
|
|
|
|
void Seek(size_t pos) { pos_ = pos; }
|
|
|
|
uint8_t Flags() const { return flags_; }
|
|
|
|
void SetFlags(uint8_t flags, uint8_t mask) {
|
|
flags_ &= ~mask;
|
|
flags_ |= (flags & kArcValueFlags);
|
|
}
|
|
|
|
private:
|
|
State state_;
|
|
size_t pos_;
|
|
// Cache the value of NumArcs(), since it is used in Done() and may be slow.
|
|
size_t num_arcs_;
|
|
mutable Arc arc_;
|
|
uint8_t flags_;
|
|
};
|
|
|
|
// ArcCompactor for unweighted string FSTs.
|
|
template <class A>
|
|
class StringCompactor {
|
|
public:
|
|
using Arc = A;
|
|
using Label = typename Arc::Label;
|
|
using StateId = typename Arc::StateId;
|
|
using Weight = typename Arc::Weight;
|
|
|
|
using Element = Label;
|
|
|
|
Element Compact(StateId s, const Arc &arc) const { return arc.ilabel; }
|
|
|
|
Arc Expand(StateId s, const Element &p,
|
|
uint8_t flags = kArcValueFlags) const {
|
|
return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
|
|
}
|
|
|
|
constexpr ssize_t Size() const { return 1; }
|
|
|
|
constexpr uint64_t Properties() const { return kCompiledStringProperties; }
|
|
|
|
bool Compatible(const Fst<Arc> &fst) const {
|
|
const auto props = Properties();
|
|
return fst.Properties(props, true) == props;
|
|
}
|
|
|
|
static const std::string &Type() {
|
|
static const std::string *const type = new std::string("string");
|
|
return *type;
|
|
}
|
|
|
|
bool Write(std::ostream &strm) const { return true; }
|
|
|
|
static StringCompactor *Read(std::istream &strm) {
|
|
return new StringCompactor;
|
|
}
|
|
};
|
|
|
|
// ArcCompactor for weighted string FSTs.
|
|
template <class A>
|
|
class WeightedStringCompactor {
|
|
public:
|
|
using Arc = A;
|
|
using Label = typename Arc::Label;
|
|
using StateId = typename Arc::StateId;
|
|
using Weight = typename Arc::Weight;
|
|
|
|
using Element = std::pair<Label, Weight>;
|
|
|
|
Element Compact(StateId s, const Arc &arc) const {
|
|
return std::make_pair(arc.ilabel, arc.weight);
|
|
}
|
|
|
|
Arc Expand(StateId s, const Element &p,
|
|
uint8_t flags = kArcValueFlags) const {
|
|
return Arc(p.first, p.first, p.second,
|
|
p.first != kNoLabel ? s + 1 : kNoStateId);
|
|
}
|
|
|
|
constexpr ssize_t Size() const { return 1; }
|
|
|
|
constexpr uint64_t Properties() const { return kString | kAcceptor; }
|
|
|
|
bool Compatible(const Fst<Arc> &fst) const {
|
|
const auto props = Properties();
|
|
return fst.Properties(props, true) == props;
|
|
}
|
|
|
|
static const std::string &Type() {
|
|
static const std::string *const type = new std::string("weighted_string");
|
|
return *type;
|
|
}
|
|
|
|
bool Write(std::ostream &strm) const { return true; }
|
|
|
|
static WeightedStringCompactor *Read(std::istream &strm) {
|
|
return new WeightedStringCompactor;
|
|
}
|
|
};
|
|
|
|
// ArcCompactor for unweighted acceptor FSTs.
|
|
template <class A>
|
|
class UnweightedAcceptorCompactor {
|
|
public:
|
|
using Arc = A;
|
|
using Label = typename Arc::Label;
|
|
using StateId = typename Arc::StateId;
|
|
using Weight = typename Arc::Weight;
|
|
|
|
using Element = std::pair<Label, StateId>;
|
|
|
|
Element Compact(StateId s, const Arc &arc) const {
|
|
return std::make_pair(arc.ilabel, arc.nextstate);
|
|
}
|
|
|
|
Arc Expand(StateId s, const Element &p,
|
|
uint8_t flags = kArcValueFlags) const {
|
|
return Arc(p.first, p.first, Weight::One(), p.second);
|
|
}
|
|
|
|
constexpr ssize_t Size() const { return -1; }
|
|
|
|
constexpr uint64_t Properties() const { return kAcceptor | kUnweighted; }
|
|
|
|
bool Compatible(const Fst<Arc> &fst) const {
|
|
const auto props = Properties();
|
|
return fst.Properties(props, true) == props;
|
|
}
|
|
|
|
static const std::string &Type() {
|
|
static const std::string *const type =
|
|
new std::string("unweighted_acceptor");
|
|
return *type;
|
|
}
|
|
|
|
bool Write(std::ostream &strm) const { return true; }
|
|
|
|
static UnweightedAcceptorCompactor *Read(std::istream &istrm) {
|
|
return new UnweightedAcceptorCompactor;
|
|
}
|
|
};
|
|
|
|
// ArcCompactor for weighted acceptor FSTs.
|
|
template <class A>
|
|
class AcceptorCompactor {
|
|
public:
|
|
using Arc = A;
|
|
using Label = typename Arc::Label;
|
|
using StateId = typename Arc::StateId;
|
|
using Weight = typename Arc::Weight;
|
|
|
|
using Element = std::pair<std::pair<Label, Weight>, StateId>;
|
|
|
|
Element Compact(StateId s, const Arc &arc) const {
|
|
return std::make_pair(std::make_pair(arc.ilabel, arc.weight),
|
|
arc.nextstate);
|
|
}
|
|
|
|
Arc Expand(StateId s, const Element &p,
|
|
uint8_t flags = kArcValueFlags) const {
|
|
return Arc(p.first.first, p.first.first, p.first.second, p.second);
|
|
}
|
|
|
|
constexpr ssize_t Size() const { return -1; }
|
|
|
|
constexpr uint64_t Properties() const { return kAcceptor; }
|
|
|
|
bool Compatible(const Fst<Arc> &fst) const {
|
|
const auto props = Properties();
|
|
return fst.Properties(props, true) == props;
|
|
}
|
|
|
|
static const std::string &Type() {
|
|
static const std::string *const type = new std::string("acceptor");
|
|
return *type;
|
|
}
|
|
|
|
bool Write(std::ostream &strm) const { return true; }
|
|
|
|
static AcceptorCompactor *Read(std::istream &strm) {
|
|
return new AcceptorCompactor;
|
|
}
|
|
};
|
|
|
|
// ArcCompactor for unweighted FSTs.
|
|
template <class A>
|
|
class UnweightedCompactor {
|
|
public:
|
|
using Arc = A;
|
|
using Label = typename Arc::Label;
|
|
using StateId = typename Arc::StateId;
|
|
using Weight = typename Arc::Weight;
|
|
|
|
using Element = std::pair<std::pair<Label, Label>, StateId>;
|
|
|
|
Element Compact(StateId s, const Arc &arc) const {
|
|
return std::make_pair(std::make_pair(arc.ilabel, arc.olabel),
|
|
arc.nextstate);
|
|
}
|
|
|
|
Arc Expand(StateId s, const Element &p,
|
|
uint8_t flags = kArcValueFlags) const {
|
|
return Arc(p.first.first, p.first.second, Weight::One(), p.second);
|
|
}
|
|
|
|
constexpr ssize_t Size() const { return -1; }
|
|
|
|
constexpr uint64_t Properties() const { return kUnweighted; }
|
|
|
|
bool Compatible(const Fst<Arc> &fst) const {
|
|
const auto props = Properties();
|
|
return fst.Properties(props, true) == props;
|
|
}
|
|
|
|
static const std::string &Type() {
|
|
static const std::string *const type = new std::string("unweighted");
|
|
return *type;
|
|
}
|
|
|
|
bool Write(std::ostream &strm) const { return true; }
|
|
|
|
static UnweightedCompactor *Read(std::istream &strm) {
|
|
return new UnweightedCompactor;
|
|
}
|
|
};
|
|
|
|
template <class Arc, class Unsigned /* = uint32_t */>
|
|
using CompactStringFst = CompactArcFst<Arc, StringCompactor<Arc>, Unsigned>;
|
|
|
|
template <class Arc, class Unsigned /* = uint32_t */>
|
|
using CompactWeightedStringFst =
|
|
CompactArcFst<Arc, WeightedStringCompactor<Arc>, Unsigned>;
|
|
|
|
template <class Arc, class Unsigned /* = uint32_t */>
|
|
using CompactAcceptorFst = CompactArcFst<Arc, AcceptorCompactor<Arc>, Unsigned>;
|
|
|
|
template <class Arc, class Unsigned /* = uint32_t */>
|
|
using CompactUnweightedFst =
|
|
CompactArcFst<Arc, UnweightedCompactor<Arc>, Unsigned>;
|
|
|
|
template <class Arc, class Unsigned /* = uint32_t */>
|
|
using CompactUnweightedAcceptorFst =
|
|
CompactArcFst<Arc, UnweightedAcceptorCompactor<Arc>, Unsigned>;
|
|
|
|
using StdCompactStringFst = CompactStringFst<StdArc, uint32_t>;
|
|
|
|
using StdCompactWeightedStringFst = CompactWeightedStringFst<StdArc, uint32_t>;
|
|
|
|
using StdCompactAcceptorFst = CompactAcceptorFst<StdArc, uint32_t>;
|
|
|
|
using StdCompactUnweightedFst = CompactUnweightedFst<StdArc, uint32_t>;
|
|
|
|
using StdCompactUnweightedAcceptorFst =
|
|
CompactUnweightedAcceptorFst<StdArc, uint32_t>;
|
|
|
|
// Convenience function to make a CompactStringFst from a sequence
|
|
// of Arc::Labels. LabelIterator must be an input iterator.
|
|
template <class Arc, class Unsigned = uint32_t, class LabelIterator>
|
|
inline CompactStringFst<Arc, Unsigned> MakeCompactStringFst(
|
|
const LabelIterator begin, const LabelIterator end) {
|
|
using CompactStringFst = CompactStringFst<Arc, Unsigned>;
|
|
using Compactor = typename CompactStringFst::Compactor;
|
|
return CompactStringFst(std::make_shared<Compactor>(begin, end));
|
|
}
|
|
|
|
template <class LabelIterator>
|
|
inline StdCompactStringFst MakeStdCompactStringFst(const LabelIterator begin,
|
|
const LabelIterator end) {
|
|
return MakeCompactStringFst<StdArc>(begin, end);
|
|
}
|
|
|
|
} // namespace fst
|
|
|
|
#endif // FST_COMPACT_FST_H_
|