You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

144 lines
4.8 KiB

  1. // Copyright 2005-2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the 'License');
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an 'AS IS' BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // See www.openfst.org for extensive documentation on this weighted
  16. // finite-state transducer library.
  17. //
  18. // Stand-alone class to print out binary FSTs in the AT&T format, a helper
  19. // class for fstprint.cc.
  20. #ifndef FST_SCRIPT_PRINT_IMPL_H_
  21. #define FST_SCRIPT_PRINT_IMPL_H_
  22. #include <ostream>
  23. #include <sstream>
  24. #include <string>
  25. #include <fst/log.h>
  26. #include <fst/fst.h>
  27. #include <fst/fstlib.h>
  28. #include <fst/properties.h>
  29. #include <fst/symbol-table.h>
  30. #include <fst/util.h>
  31. #include <string_view>
  32. namespace fst {
  33. // Print a binary FST in textual format (helper class for fstprint.cc).
  34. // WARNING: Stand-alone use of this class not recommended, most code should
  35. // read/write using the binary format which is much more efficient.
  36. template <class Arc>
  37. class FstPrinter {
  38. public:
  39. using StateId = typename Arc::StateId;
  40. using Label = typename Arc::Label;
  41. using Weight = typename Arc::Weight;
  42. explicit FstPrinter(const Fst<Arc> &fst, const SymbolTable *isyms,
  43. const SymbolTable *osyms, const SymbolTable *ssyms,
  44. bool accept, bool show_weight_one,
  45. std::string_view field_separator,
  46. std::string_view missing_symbol = "")
  47. : fst_(fst),
  48. isyms_(isyms),
  49. osyms_(osyms),
  50. ssyms_(ssyms),
  51. accept_(accept && (fst.Properties(kAcceptor, true) == kAcceptor)),
  52. show_weight_one_(show_weight_one),
  53. sep_(field_separator),
  54. missing_symbol_(missing_symbol) {}
  55. // Prints FST to an output stream.
  56. void Print(std::ostream &ostrm, std::string_view dest) {
  57. dest_ = std::string(dest);
  58. const auto start = fst_.Start();
  59. if (start == kNoStateId) return;
  60. // Initial state first.
  61. PrintState(ostrm, start);
  62. for (StateIterator<Fst<Arc>> siter(fst_); !siter.Done(); siter.Next()) {
  63. const auto s = siter.Value();
  64. if (s != start) PrintState(ostrm, s);
  65. }
  66. }
  67. private:
  68. std::string FormatId(StateId id, const SymbolTable *syms) const {
  69. if (syms) {
  70. std::string symbol = syms->Find(id);
  71. if (symbol.empty()) {
  72. if (missing_symbol_.empty()) {
  73. FSTERROR() << "FstPrinter: Integer " << id
  74. << " is not mapped to any textual symbol"
  75. << ", symbol table = " << syms->Name()
  76. << ", destination = " << dest_;
  77. symbol = "?";
  78. } else {
  79. symbol = missing_symbol_;
  80. }
  81. }
  82. return symbol;
  83. } else {
  84. return std::to_string(id);
  85. }
  86. }
  87. std::string FormatStateId(StateId s) const { return FormatId(s, ssyms_); }
  88. std::string FormatILabel(Label l) const { return FormatId(l, isyms_); }
  89. std::string FormatOLabel(Label l) const { return FormatId(l, osyms_); }
  90. void PrintState(std::ostream &ostrm, StateId s) const {
  91. bool output = false;
  92. for (ArcIterator<Fst<Arc>> aiter(fst_, s); !aiter.Done(); aiter.Next()) {
  93. const auto &arc = aiter.Value();
  94. ostrm << FormatStateId(s) << sep_ << FormatStateId(arc.nextstate)
  95. << sep_ << FormatILabel(arc.ilabel);
  96. if (!accept_) {
  97. ostrm << sep_ << FormatOLabel(arc.olabel);
  98. }
  99. if (show_weight_one_ || arc.weight != Weight::One()) {
  100. ostrm << sep_ << arc.weight;
  101. }
  102. ostrm << "\n";
  103. output = true;
  104. }
  105. const auto weight = fst_.Final(s);
  106. if (weight != Weight::Zero() || !output) {
  107. ostrm << FormatStateId(s);
  108. if (show_weight_one_ || weight != Weight::One()) {
  109. ostrm << sep_ << weight;
  110. }
  111. ostrm << "\n";
  112. }
  113. }
  114. const Fst<Arc> &fst_;
  115. const SymbolTable *isyms_; // ilabel symbol table.
  116. const SymbolTable *osyms_; // olabel symbol table.
  117. const SymbolTable *ssyms_; // slabel symbol table.
  118. bool accept_; // Print as acceptor when possible?
  119. std::string dest_; // Text FST destination name.
  120. bool show_weight_one_; // Print weights equal to Weight::One()?
  121. std::string sep_; // Separator character between fields.
  122. std::string missing_symbol_; // Symbol to print when lookup fails (default
  123. // "" means raise error).
  124. FstPrinter(const FstPrinter &) = delete;
  125. FstPrinter &operator=(const FstPrinter &) = delete;
  126. };
  127. } // namespace fst
  128. #endif // FST_SCRIPT_PRINT_IMPL_H_