You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

208 lines
6.6 KiB

  1. // fstext/kaldi-fst-io-inl.h
  2. // Copyright 2009-2011 Microsoft Corporation
  3. // 2012-2015 Johns Hopkins University (Author: Daniel Povey)
  4. // 2013 Guoguo Chen
  5. // See ../../COPYING for clarification regarding multiple authors
  6. //
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. //
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. //
  13. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  15. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  16. // MERCHANTABLITY OR NON-INFRINGEMENT.
  17. // See the Apache 2 License for the specific language governing permissions and
  18. // limitations under the License.
  19. #ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
  20. #define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
  21. #include <string>
  22. #include <vector>
  23. #include "util/text-utils.h"
  24. namespace fst {
  25. template <class Arc>
  26. void WriteFstKaldi(std::ostream& os, bool binary, const VectorFst<Arc>& t) {
  27. bool ok;
  28. if (binary) {
  29. // Binary-mode writing.
  30. ok = t.Write(os, FstWriteOptions());
  31. } else {
  32. // Text-mode output. Note: we expect that t.InputSymbols() and
  33. // t.OutputSymbols() would always return NULL. The corresponding input
  34. // routine would not work if the FST actually had symbols attached. Write a
  35. // newline to start the FST; in a table, the first line of the FST will
  36. // appear on its own line.
  37. os << '\n';
  38. bool acceptor = false, write_one = false;
  39. FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
  40. acceptor, write_one, "\t");
  41. printer.Print(&os, "<unknown>");
  42. if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
  43. // Write another newline as a terminating character. The read routine will
  44. // detect this [this is a Kaldi mechanism, not something in the original
  45. // OpenFst code].
  46. os << '\n';
  47. ok = os.good();
  48. }
  49. if (!ok) {
  50. KALDI_ERR << "Error writing FST to stream";
  51. }
  52. }
  53. // Utility function used in ReadFstKaldi
  54. template <class W>
  55. inline bool StrToWeight(const std::string& s, bool allow_zero, W* w) {
  56. std::istringstream strm(s);
  57. strm >> *w;
  58. if (strm.fail() || (!allow_zero && *w == W::Zero())) {
  59. return false;
  60. }
  61. return true;
  62. }
  63. template <class Arc>
  64. void ReadFstKaldi(std::istream& is, bool binary, VectorFst<Arc>* fst) {
  65. typedef typename Arc::Weight Weight;
  66. typedef typename Arc::StateId StateId;
  67. if (binary) {
  68. // We don't have access to the filename here, so write [unknown].
  69. VectorFst<Arc>* ans =
  70. VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
  71. if (ans == NULL) {
  72. KALDI_ERR << "Error reading FST from stream.";
  73. }
  74. *fst = *ans; // shallow copy.
  75. delete ans;
  76. } else {
  77. // Consume the \r on Windows, the \n that the text-form FST format starts
  78. // with, and any extra spaces that might have got in there somehow.
  79. while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
  80. if (is.peek() == '\n') {
  81. is.get(); // consume the newline.
  82. } else { // saw spaces but no newline.. this is not expected.
  83. KALDI_ERR << "Reading FST: unexpected sequence of spaces "
  84. << " at file position " << is.tellg();
  85. }
  86. using kaldi::ConvertStringToInteger;
  87. using kaldi::SplitStringToIntegers;
  88. using std::string;
  89. using std::vector;
  90. fst->DeleteStates();
  91. string line;
  92. size_t nline = 0;
  93. string separator = FLAGS_fst_field_separator + "\r\n";
  94. while (std::getline(is, line)) {
  95. nline++;
  96. vector<string> col;
  97. // on Windows we'll write in text and read in binary mode.
  98. kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
  99. if (col.size() == 0) break; // Empty line is a signal to stop, in our
  100. // archive format.
  101. if (col.size() > 5) {
  102. KALDI_ERR << "Bad line in FST: " << line;
  103. }
  104. StateId s;
  105. if (!ConvertStringToInteger(col[0], &s)) {
  106. KALDI_ERR << "Bad line in FST: " << line;
  107. }
  108. while (s >= fst->NumStates()) fst->AddState();
  109. if (nline == 1) fst->SetStart(s);
  110. bool ok = true;
  111. Arc arc;
  112. Weight w;
  113. StateId d = s;
  114. switch (col.size()) {
  115. case 1:
  116. fst->SetFinal(s, Weight::One());
  117. break;
  118. case 2:
  119. if (!StrToWeight(col[1], true, &w))
  120. ok = false;
  121. else
  122. fst->SetFinal(s, w);
  123. break;
  124. case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
  125. ok = false;
  126. break;
  127. case 4:
  128. ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
  129. ConvertStringToInteger(col[2], &arc.ilabel) &&
  130. ConvertStringToInteger(col[3], &arc.olabel);
  131. if (ok) {
  132. d = arc.nextstate;
  133. arc.weight = Weight::One();
  134. fst->AddArc(s, arc);
  135. }
  136. break;
  137. case 5:
  138. ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
  139. ConvertStringToInteger(col[2], &arc.ilabel) &&
  140. ConvertStringToInteger(col[3], &arc.olabel) &&
  141. StrToWeight(col[4], false, &arc.weight);
  142. if (ok) {
  143. d = arc.nextstate;
  144. fst->AddArc(s, arc);
  145. }
  146. break;
  147. default:
  148. ok = false;
  149. }
  150. while (d >= fst->NumStates()) fst->AddState();
  151. if (!ok) KALDI_ERR << "Bad line in FST: " << line;
  152. }
  153. }
  154. }
  155. template <class Arc> // static
  156. bool VectorFstTplHolder<Arc>::Write(std::ostream& os, bool binary, const T& t) {
  157. try {
  158. WriteFstKaldi(os, binary, t);
  159. return true;
  160. } catch (...) {
  161. return false;
  162. }
  163. }
  164. template <class Arc> // static
  165. bool VectorFstTplHolder<Arc>::Read(std::istream& is) {
  166. Clear();
  167. int c = is.peek();
  168. if (c == -1) {
  169. KALDI_WARN << "End of stream detected reading Fst";
  170. return false;
  171. } else if (isspace(c)) { // The text form of the FST begins
  172. // with space (normally, '\n'), so this means it's text (the binary form
  173. // cannot begin with space because it starts with the FST Type() which is
  174. // not space).
  175. try {
  176. t_ = new VectorFst<Arc>();
  177. ReadFstKaldi(is, false, t_);
  178. } catch (...) {
  179. Clear();
  180. return false;
  181. }
  182. } else { // reading a binary FST.
  183. try {
  184. t_ = new VectorFst<Arc>();
  185. ReadFstKaldi(is, true, t_);
  186. } catch (...) {
  187. Clear();
  188. return false;
  189. }
  190. }
  191. return true;
  192. }
  193. } // namespace fst.
  194. #endif // KALDI_FSTEXT_KALDI_FST_IO_INL_H_