// Copyright 2005-2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the 'License'); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an 'AS IS' BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // General weight set and associated semiring operation definitions. #ifndef FST_WEIGHT_H_ #define FST_WEIGHT_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include DECLARE_string(fst_weight_parentheses); DECLARE_string(fst_weight_separator); namespace fst { // A semiring is specified by two binary operations Plus and Times and two // designated elements Zero and One with the following properties: // // Plus: associative, commutative, and has Zero as its identity. // // Times: associative and has identity One, distributes w.r.t. Plus, and // has Zero as an annihilator: // Times(Zero(), a) == Times(a, Zero()) = Zero(). // // A left semiring distributes on the left; a right semiring is similarly // defined. // // A Weight class must have binary functions Plus and Times and static member // functions Zero() and One() and these must form (at least) a left or right // semiring. // // In addition, the following should be defined for a Weight: // // Member: predicate on set membership. // // NoWeight: static member function that returns an element that is // not a set member; used to signal an error. // // >>: reads textual representation of a weight. // // <<: prints textual representation of a weight. // // Read(istream &istrm): reads binary representation of a weight. // // Write(ostream &ostrm): writes binary representation of a weight. // // Hash: maps weight to size_t. // // ApproxEqual: approximate equality (for inexact weights) // // Quantize: quantizes w.r.t delta (for inexact weights) // // Divide: // - In a left semiring, for all a, b, b', c: // if Times(a, b) = c, Divide(c, a, DIVIDE_LEFT) = b' and b'.Member(), // then Times(a, b') = c. // - In a right semiring, for all a, a', b, c: // if Times(a, b) = c, Divide(c, b, DIVIDE_RIGHT) = a' and a'.Member(), // then Times(a', b) = c. // - In a commutative semiring, // * for all a, c: // Divide(c, a, DIVIDE_ANY) = Divide(c, a, DIVIDE_LEFT) // = Divide(c, a, DIVIDE_RIGHT) // * for all a, b, b', c: // if Times(a, b) = c, Divide(c, a, DIVIDE_ANY) = b' and b'.Member(), // then Times(a, b') = c // - In the case where there exist no b such that c = Times(a, b), the // return value of Divide(c, a, DIVIDE_LEFT) is unspecified. Returning // Weight::NoWeight() is recommemded but not required in order to // allow the most efficient implementation. // - All algorithms in this library only call Divide(c, a) when it is // guaranteed that there exists a b such that c = Times(a, b). // // ReverseWeight: the type of the corresponding reverse weight. // // Typically the same type as Weight for a (both left and right) semiring. // For the left string semiring, it is the right string semiring. // // Reverse: a mapping from Weight to ReverseWeight s.t. // // --> Reverse(Reverse(a)) = a // --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b)) // --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a)) // Typically the identity mapping in a (both left and right) semiring. // In the left string semiring, it maps to the reverse string in the right // string semiring. // // Properties: specifies additional properties that hold: // LeftSemiring: indicates weights form a left semiring. // RightSemiring: indicates weights form a right semiring. // Commutative: for all a, b: Times(a,b) == Times(b, a) // Idempotent: for all a: Plus(a, a) == a. // Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b. // // User-defined weights and their corresponding operations SHOULD be // defined in the same namespace, but SHOULD NOT defined in the fst // namespace. Defining them in fst would make the user code fragile // to additions in fst. They will be found in another namespace // via argument-dependent lookup. // CONSTANT DEFINITIONS // A representable float near .001. inline constexpr float kDelta = 1.0F / 1024.0F; // For all a, b, c: Times(c, Plus(a, b)) = Plus(Times(c, a), Times(c, b)). inline constexpr uint64_t kLeftSemiring = 0x0000000000000001ULL; // For all a, b, c: Times(Plus(a, b), c) = Plus(Times(a, c), Times(b, c)). inline constexpr uint64_t kRightSemiring = 0x0000000000000002ULL; inline constexpr uint64_t kSemiring = kLeftSemiring | kRightSemiring; // For all a, b: Times(a, b) = Times(b, a). inline constexpr uint64_t kCommutative = 0x0000000000000004ULL; // For all a: Plus(a, a) = a. inline constexpr uint64_t kIdempotent = 0x0000000000000008ULL; // For all a, b: Plus(a, b) = a or Plus(a, b) = b. inline constexpr uint64_t kPath = 0x0000000000000010ULL; // For random weight generation: default number of distinct weights. // This is also used for a few other weight generation defaults. inline constexpr size_t kNumRandomWeights = 5; // Weight property boolean constants needed for SFINAE. template using IsIdempotent = std::bool_constant<(W::Properties() & kIdempotent) != 0>; template using IsPath = std::bool_constant<(W::Properties() & kPath) != 0>; // Determines direction of division. enum DivideType { DIVIDE_LEFT, // left division DIVIDE_RIGHT, // right division DIVIDE_ANY }; // division in a commutative semiring // NATURAL ORDER // // By definition: // // a <= b iff a + b = a // // The natural order is a negative partial order iff the semiring is // idempotent. It is trivially monotonic for plus. It is left // (resp. right) monotonic for times iff the semiring is left // (resp. right) distributive. It is a total order iff the semiring // has the path property. // // For more information, see: // // Mohri, M. 2002. Semiring framework and algorithms for shortest-distance // problems, Journal of Automata, Languages and // Combinatorics 7(3): 321-350, 2002. // // We define the strict version of this order below. // Requires W is idempotent. template struct NaturalLess { using Weight = W; static_assert(IsIdempotent::value, "W must be idempotent."); bool operator()(const Weight &w1, const Weight &w2) const { return w1 != w2 && Plus(w1, w2) == w1; } }; // Power is the iterated product for arbitrary semirings such that Power(w, 0) // is One() for the semiring, and Power(w, n) = Times(Power(w, n - 1), w). template Weight Power(const Weight &weight, size_t n) { auto result = Weight::One(); for (size_t i = 0; i < n; ++i) result = Times(result, weight); return result; } // Simple default adder class. Specializations might be more complex. template class Adder { public: Adder() : sum_(Weight::Zero()) {} explicit Adder(Weight w) : sum_(std::move(w)) {} Weight Add(const Weight &w) { sum_ = Plus(sum_, w); return sum_; } Weight Sum() const { return sum_; } void Reset(Weight w = Weight::Zero()) { sum_ = std::move(w); } private: Weight sum_; }; // General weight converter: raises error. template struct WeightConvert { W2 operator()(W1 w1) const { FSTERROR() << "WeightConvert: Can't convert weight from " << W1::Type() << " to " << W2::Type(); return W2::NoWeight(); } }; // Specialized weight converter to self. template struct WeightConvert { constexpr W operator()(W weight) const { return weight; } }; // General random weight generator: raises error. // // The standard interface is roughly: // // class WeightGenerate { // public: // explicit WeightGenerate(uint64_t seed = std::random_device()(), // bool allow_zero = true, // ...); // // MyWeight operator()() const; // }; // // Many weight generators also take trailing constructor arguments specifying // the number of random (unique) weights, the length of weights (e.g., for // string-based weights), etc. with sensible defaults template struct WeightGenerate { W operator()() const { FSTERROR() << "WeightGenerate: No random generator for " << W::Type(); return W::NoWeight(); } }; namespace internal { class CompositeWeightIO { public: CompositeWeightIO(); CompositeWeightIO(char separator, std::pair parentheses); std::pair parentheses() const { return {open_paren_, close_paren_}; } char separator() const { return separator_; } bool error() const { return error_; } protected: const char separator_; const char open_paren_; const char close_paren_; private: bool error_; }; } // namespace internal // Helper class for writing textual composite weights. class CompositeWeightWriter : public internal::CompositeWeightIO { public: // Uses configuration from flags (FST_FLAGS_fst_weight_separator, // FST_FLAGS_fst_weight_parentheses). explicit CompositeWeightWriter(std::ostream &ostrm); // parentheses defines the opening and closing parenthesis characters. // Set parentheses = {0, 0} to disable writing parenthesis. CompositeWeightWriter(std::ostream &ostrm, char separator, std::pair parentheses); CompositeWeightWriter(const CompositeWeightWriter &) = delete; CompositeWeightWriter &operator=(const CompositeWeightWriter &) = delete; // Writes open parenthesis to a stream if option selected. void WriteBegin(); // Writes element to a stream. template void WriteElement(const T &comp) { if (i_++ > 0) ostrm_ << separator_; ostrm_ << comp; } // Writes close parenthesis to a stream if option selected. void WriteEnd(); private: std::ostream &ostrm_; int i_ = 0; // Element position. }; // Helper class for reading textual composite weights. Elements are separated by // a separator character. There must be at least one element per textual // representation. Parentheses characters should be set if the composite // weights themselves contain composite weights to ensure proper parsing. class CompositeWeightReader : public internal::CompositeWeightIO { public: // Uses configuration from flags (FST_FLAGS_fst_weight_separator, // FST_FLAGS_fst_weight_parentheses). explicit CompositeWeightReader(std::istream &istrm); // parentheses defines the opening and closing parenthesis characters. // Set parentheses = {0, 0} to disable reading parenthesis. CompositeWeightReader(std::istream &istrm, char separator, std::pair parentheses); CompositeWeightReader(const CompositeWeightReader &) = delete; CompositeWeightReader &operator=(const CompositeWeightReader &) = delete; // Reads open parenthesis from a stream if option selected. void ReadBegin(); // Reads element from a stream. The second argument, when true, indicates that // this will be the last element (allowing more forgiving formatting of the // last element). Returns false when last element is read. template bool ReadElement(T *comp, bool last = false); // Finalizes reading. void ReadEnd(); private: std::istream &istrm_; // Input stream. int c_ = 0; // Last character read, or EOF. int depth_ = 0; // Weight parentheses depth. }; template inline bool CompositeWeightReader::ReadElement(T *comp, bool last) { std::string s; const bool has_parens = open_paren_ != 0; while ((c_ != std::istream::traits_type::eof()) && !std::isspace(c_) && (c_ != separator_ || depth_ > 1 || last) && (c_ != close_paren_ || depth_ != 1)) { s += c_; // If parentheses encountered before separator, they must be matched. if (has_parens && c_ == open_paren_) { ++depth_; } else if (has_parens && c_ == close_paren_) { // Failure on unmatched parentheses. if (depth_ == 0) { FSTERROR() << "CompositeWeightReader: Unmatched close paren: " << "Is the fst_weight_parentheses flag set correctly?"; istrm_.clear(std::ios::badbit); return false; } --depth_; } c_ = istrm_.get(); } if (s.empty()) { FSTERROR() << "CompositeWeightReader: Empty element: " << "Is the fst_weight_parentheses flag set correctly?"; istrm_.clear(std::ios::badbit); return false; } std::istringstream istrm(s); istrm >> *comp; // Skips separator/close parenthesis. if (c_ != std::istream::traits_type::eof() && !std::isspace(c_)) { c_ = istrm_.get(); } const bool is_eof = c_ == std::istream::traits_type::eof(); // Clears fail bit if just EOF. if (is_eof && !istrm_.bad()) istrm_.clear(std::ios::eofbit); return !is_eof && !std::isspace(c_); } } // namespace fst #endif // FST_WEIGHT_H_