|
|
// Copyright 2005-2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the 'License');
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an 'AS IS' BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// See www.openfst.org for extensive documentation on this weighted
// finite-state transducer library.
//
// General weight set and associated semiring operation definitions.
#ifndef FST_WEIGHT_H_
#define FST_WEIGHT_H_
#include <cctype>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <ios>
#include <iostream>
#include <istream>
#include <ostream>
#include <sstream>
#include <string>
#include <type_traits>
#include <utility>
#include <fst/compat.h>
#include <fst/log.h>
#include <fst/util.h>
DECLARE_string(fst_weight_parentheses); DECLARE_string(fst_weight_separator);
namespace fst {
// A semiring is specified by two binary operations Plus and Times and two
// designated elements Zero and One with the following properties:
//
// Plus: associative, commutative, and has Zero as its identity.
//
// Times: associative and has identity One, distributes w.r.t. Plus, and
// has Zero as an annihilator:
// Times(Zero(), a) == Times(a, Zero()) = Zero().
//
// A left semiring distributes on the left; a right semiring is similarly
// defined.
//
// A Weight class must have binary functions Plus and Times and static member
// functions Zero() and One() and these must form (at least) a left or right
// semiring.
//
// In addition, the following should be defined for a Weight:
//
// Member: predicate on set membership.
//
// NoWeight: static member function that returns an element that is
// not a set member; used to signal an error.
//
// >>: reads textual representation of a weight.
//
// <<: prints textual representation of a weight.
//
// Read(istream &istrm): reads binary representation of a weight.
//
// Write(ostream &ostrm): writes binary representation of a weight.
//
// Hash: maps weight to size_t.
//
// ApproxEqual: approximate equality (for inexact weights)
//
// Quantize: quantizes w.r.t delta (for inexact weights)
//
// Divide:
// - In a left semiring, for all a, b, b', c:
// if Times(a, b) = c, Divide(c, a, DIVIDE_LEFT) = b' and b'.Member(),
// then Times(a, b') = c.
// - In a right semiring, for all a, a', b, c:
// if Times(a, b) = c, Divide(c, b, DIVIDE_RIGHT) = a' and a'.Member(),
// then Times(a', b) = c.
// - In a commutative semiring,
// * for all a, c:
// Divide(c, a, DIVIDE_ANY) = Divide(c, a, DIVIDE_LEFT)
// = Divide(c, a, DIVIDE_RIGHT)
// * for all a, b, b', c:
// if Times(a, b) = c, Divide(c, a, DIVIDE_ANY) = b' and b'.Member(),
// then Times(a, b') = c
// - In the case where there exist no b such that c = Times(a, b), the
// return value of Divide(c, a, DIVIDE_LEFT) is unspecified. Returning
// Weight::NoWeight() is recommemded but not required in order to
// allow the most efficient implementation.
// - All algorithms in this library only call Divide(c, a) when it is
// guaranteed that there exists a b such that c = Times(a, b).
//
// ReverseWeight: the type of the corresponding reverse weight.
//
// Typically the same type as Weight for a (both left and right) semiring.
// For the left string semiring, it is the right string semiring.
//
// Reverse: a mapping from Weight to ReverseWeight s.t.
//
// --> Reverse(Reverse(a)) = a
// --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b))
// --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a))
// Typically the identity mapping in a (both left and right) semiring.
// In the left string semiring, it maps to the reverse string in the right
// string semiring.
//
// Properties: specifies additional properties that hold:
// LeftSemiring: indicates weights form a left semiring.
// RightSemiring: indicates weights form a right semiring.
// Commutative: for all a, b: Times(a,b) == Times(b, a)
// Idempotent: for all a: Plus(a, a) == a.
// Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
//
// User-defined weights and their corresponding operations SHOULD be
// defined in the same namespace, but SHOULD NOT defined in the fst
// namespace. Defining them in fst would make the user code fragile
// to additions in fst. They will be found in another namespace
// via argument-dependent lookup.
// CONSTANT DEFINITIONS
// A representable float near .001.
inline constexpr float kDelta = 1.0F / 1024.0F;
// For all a, b, c: Times(c, Plus(a, b)) = Plus(Times(c, a), Times(c, b)).
inline constexpr uint64_t kLeftSemiring = 0x0000000000000001ULL;
// For all a, b, c: Times(Plus(a, b), c) = Plus(Times(a, c), Times(b, c)).
inline constexpr uint64_t kRightSemiring = 0x0000000000000002ULL;
inline constexpr uint64_t kSemiring = kLeftSemiring | kRightSemiring;
// For all a, b: Times(a, b) = Times(b, a).
inline constexpr uint64_t kCommutative = 0x0000000000000004ULL;
// For all a: Plus(a, a) = a.
inline constexpr uint64_t kIdempotent = 0x0000000000000008ULL;
// For all a, b: Plus(a, b) = a or Plus(a, b) = b.
inline constexpr uint64_t kPath = 0x0000000000000010ULL;
// For random weight generation: default number of distinct weights.
// This is also used for a few other weight generation defaults.
inline constexpr size_t kNumRandomWeights = 5;
// Weight property boolean constants needed for SFINAE.
template <class W> using IsIdempotent = std::bool_constant<(W::Properties() & kIdempotent) != 0>;
template <class W> using IsPath = std::bool_constant<(W::Properties() & kPath) != 0>;
// Determines direction of division.
enum DivideType { DIVIDE_LEFT, // left division
DIVIDE_RIGHT, // right division
DIVIDE_ANY }; // division in a commutative semiring
// NATURAL ORDER
//
// By definition:
//
// a <= b iff a + b = a
//
// The natural order is a negative partial order iff the semiring is
// idempotent. It is trivially monotonic for plus. It is left
// (resp. right) monotonic for times iff the semiring is left
// (resp. right) distributive. It is a total order iff the semiring
// has the path property.
//
// For more information, see:
//
// Mohri, M. 2002. Semiring framework and algorithms for shortest-distance
// problems, Journal of Automata, Languages and
// Combinatorics 7(3): 321-350, 2002.
//
// We define the strict version of this order below.
// Requires W is idempotent.
template <class W> struct NaturalLess { using Weight = W; static_assert(IsIdempotent<W>::value, "W must be idempotent.");
bool operator()(const Weight &w1, const Weight &w2) const { return w1 != w2 && Plus(w1, w2) == w1; } };
// Power is the iterated product for arbitrary semirings such that Power(w, 0)
// is One() for the semiring, and Power(w, n) = Times(Power(w, n - 1), w).
template <class Weight> Weight Power(const Weight &weight, size_t n) { auto result = Weight::One(); for (size_t i = 0; i < n; ++i) result = Times(result, weight); return result; }
// Simple default adder class. Specializations might be more complex.
template <class Weight> class Adder { public: Adder() : sum_(Weight::Zero()) {}
explicit Adder(Weight w) : sum_(std::move(w)) {}
Weight Add(const Weight &w) { sum_ = Plus(sum_, w); return sum_; }
Weight Sum() const { return sum_; }
void Reset(Weight w = Weight::Zero()) { sum_ = std::move(w); }
private: Weight sum_; };
// General weight converter: raises error.
template <class W1, class W2> struct WeightConvert { W2 operator()(W1 w1) const { FSTERROR() << "WeightConvert: Can't convert weight from " << W1::Type() << " to " << W2::Type(); return W2::NoWeight(); } };
// Specialized weight converter to self.
template <class W> struct WeightConvert<W, W> { constexpr W operator()(W weight) const { return weight; } };
// General random weight generator: raises error.
//
// The standard interface is roughly:
//
// class WeightGenerate<MyWeight> {
// public:
// explicit WeightGenerate(uint64_t seed = std::random_device()(),
// bool allow_zero = true,
// ...);
//
// MyWeight operator()() const;
// };
//
// Many weight generators also take trailing constructor arguments specifying
// the number of random (unique) weights, the length of weights (e.g., for
// string-based weights), etc. with sensible defaults
template <class W> struct WeightGenerate { W operator()() const { FSTERROR() << "WeightGenerate: No random generator for " << W::Type(); return W::NoWeight(); } };
namespace internal {
class CompositeWeightIO { public: CompositeWeightIO(); CompositeWeightIO(char separator, std::pair<char, char> parentheses);
std::pair<char, char> parentheses() const { return {open_paren_, close_paren_}; } char separator() const { return separator_; }
bool error() const { return error_; }
protected: const char separator_; const char open_paren_; const char close_paren_;
private: bool error_; };
} // namespace internal
// Helper class for writing textual composite weights.
class CompositeWeightWriter : public internal::CompositeWeightIO { public: // Uses configuration from flags (FST_FLAGS_fst_weight_separator,
// FST_FLAGS_fst_weight_parentheses).
explicit CompositeWeightWriter(std::ostream &ostrm);
// parentheses defines the opening and closing parenthesis characters.
// Set parentheses = {0, 0} to disable writing parenthesis.
CompositeWeightWriter(std::ostream &ostrm, char separator, std::pair<char, char> parentheses);
CompositeWeightWriter(const CompositeWeightWriter &) = delete; CompositeWeightWriter &operator=(const CompositeWeightWriter &) = delete;
// Writes open parenthesis to a stream if option selected.
void WriteBegin();
// Writes element to a stream.
template <class T> void WriteElement(const T &comp) { if (i_++ > 0) ostrm_ << separator_; ostrm_ << comp; }
// Writes close parenthesis to a stream if option selected.
void WriteEnd();
private: std::ostream &ostrm_; int i_ = 0; // Element position.
};
// Helper class for reading textual composite weights. Elements are separated by
// a separator character. There must be at least one element per textual
// representation. Parentheses characters should be set if the composite
// weights themselves contain composite weights to ensure proper parsing.
class CompositeWeightReader : public internal::CompositeWeightIO { public: // Uses configuration from flags (FST_FLAGS_fst_weight_separator,
// FST_FLAGS_fst_weight_parentheses).
explicit CompositeWeightReader(std::istream &istrm);
// parentheses defines the opening and closing parenthesis characters.
// Set parentheses = {0, 0} to disable reading parenthesis.
CompositeWeightReader(std::istream &istrm, char separator, std::pair<char, char> parentheses);
CompositeWeightReader(const CompositeWeightReader &) = delete; CompositeWeightReader &operator=(const CompositeWeightReader &) = delete;
// Reads open parenthesis from a stream if option selected.
void ReadBegin();
// Reads element from a stream. The second argument, when true, indicates that
// this will be the last element (allowing more forgiving formatting of the
// last element). Returns false when last element is read.
template <class T> bool ReadElement(T *comp, bool last = false);
// Finalizes reading.
void ReadEnd();
private: std::istream &istrm_; // Input stream.
int c_ = 0; // Last character read, or EOF.
int depth_ = 0; // Weight parentheses depth.
};
template <class T> inline bool CompositeWeightReader::ReadElement(T *comp, bool last) { std::string s; const bool has_parens = open_paren_ != 0; while ((c_ != std::istream::traits_type::eof()) && !std::isspace(c_) && (c_ != separator_ || depth_ > 1 || last) && (c_ != close_paren_ || depth_ != 1)) { s += c_; // If parentheses encountered before separator, they must be matched.
if (has_parens && c_ == open_paren_) { ++depth_; } else if (has_parens && c_ == close_paren_) { // Failure on unmatched parentheses.
if (depth_ == 0) { FSTERROR() << "CompositeWeightReader: Unmatched close paren: " << "Is the fst_weight_parentheses flag set correctly?"; istrm_.clear(std::ios::badbit); return false; } --depth_; } c_ = istrm_.get(); } if (s.empty()) { FSTERROR() << "CompositeWeightReader: Empty element: " << "Is the fst_weight_parentheses flag set correctly?"; istrm_.clear(std::ios::badbit); return false; } std::istringstream istrm(s); istrm >> *comp; // Skips separator/close parenthesis.
if (c_ != std::istream::traits_type::eof() && !std::isspace(c_)) { c_ = istrm_.get(); } const bool is_eof = c_ == std::istream::traits_type::eof(); // Clears fail bit if just EOF.
if (is_eof && !istrm_.bad()) istrm_.clear(std::ios::eofbit); return !is_eof && !std::isspace(c_); }
} // namespace fst
#endif // FST_WEIGHT_H_
|