You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

148 lines
5.4 KiB

  1. // fstext/kaldi-fst-io.cc
  2. // Copyright 2009-2011 Microsoft Corporation
  3. // 2012-2015 Johns Hopkins University (Author: Daniel Povey)
  4. // 2013 Guoguo Chen
  5. // See ../../COPYING for clarification regarding multiple authors
  6. //
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. //
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. //
  13. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  15. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  16. // MERCHANTABLITY OR NON-INFRINGEMENT.
  17. // See the Apache 2 License for the specific language governing permissions and
  18. // limitations under the License.
  19. #include "fstext/kaldi-fst-io.h"
  20. #include <string>
  21. #include "base/kaldi-error.h"
  22. #include "base/kaldi-math.h"
  23. #include "util/kaldi-io.h"
  24. namespace fst {
  25. VectorFst<StdArc>* ReadFstKaldi(std::string rxfilename) {
  26. if (rxfilename == "") rxfilename = "-"; // interpret "" as stdin,
  27. // for compatibility with OpenFst conventions.
  28. kaldi::Input ki(rxfilename);
  29. fst::FstHeader hdr;
  30. if (!hdr.Read(ki.Stream(), rxfilename))
  31. KALDI_ERR << "Reading FST: error reading FST header from "
  32. << kaldi::PrintableRxfilename(rxfilename);
  33. FstReadOptions ropts("<unspecified>", &hdr);
  34. VectorFst<StdArc>* fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
  35. if (!fst)
  36. KALDI_ERR << "Could not read fst from "
  37. << kaldi::PrintableRxfilename(rxfilename);
  38. return fst;
  39. }
  40. // Register const fst to load it automatically. Other types like
  41. // olabel_lookahead or ngram or compact_fst should be registered
  42. // through OpenFst registration API.
  43. static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
  44. static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
  45. Fst<StdArc>* ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
  46. if (rxfilename == "") rxfilename = "-"; // interpret "" as stdin,
  47. // for compatibility with OpenFst conventions.
  48. kaldi::Input ki(rxfilename);
  49. fst::FstHeader hdr;
  50. // Read FstHeader which contains the type of FST
  51. if (!hdr.Read(ki.Stream(), rxfilename)) {
  52. if (throw_on_err) {
  53. KALDI_ERR << "Reading FST: error reading FST header from "
  54. << kaldi::PrintableRxfilename(rxfilename);
  55. } else {
  56. KALDI_WARN << "We fail to read FST header from "
  57. << kaldi::PrintableRxfilename(rxfilename)
  58. << ". A NULL pointer is returned.";
  59. return NULL;
  60. }
  61. }
  62. // Check the type of Arc
  63. if (hdr.ArcType() != fst::StdArc::Type()) {
  64. if (throw_on_err) {
  65. KALDI_ERR << "FST with arc type " << hdr.ArcType()
  66. << " is not supported.";
  67. } else {
  68. KALDI_WARN << "Fst with arc type" << hdr.ArcType()
  69. << " is not supported. A NULL pointer is returned.";
  70. return NULL;
  71. }
  72. }
  73. // Read the FST
  74. FstReadOptions ropts("<unspecified>", &hdr);
  75. Fst<StdArc>* fst = Fst<StdArc>::Read(ki.Stream(), ropts);
  76. if (!fst) {
  77. if (throw_on_err) {
  78. KALDI_ERR << "Could not read fst from "
  79. << kaldi::PrintableRxfilename(rxfilename);
  80. } else {
  81. KALDI_WARN << "Could not read fst from "
  82. << kaldi::PrintableRxfilename(rxfilename)
  83. << ". A NULL pointer is returned.";
  84. return NULL;
  85. }
  86. }
  87. return fst;
  88. }
  89. VectorFst<StdArc>* CastOrConvertToVectorFst(Fst<StdArc>* fst) {
  90. // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
  91. std::string real_type = fst->Type();
  92. KALDI_ASSERT(real_type == "vector" || real_type == "const");
  93. if (real_type == "vector") {
  94. return dynamic_cast<VectorFst<StdArc>*>(fst);
  95. } else {
  96. // As the 'fst' can't cast to VectorFst, we create a new
  97. // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
  98. VectorFst<StdArc>* new_fst = new VectorFst<StdArc>(*fst);
  99. delete fst;
  100. return new_fst;
  101. }
  102. }
  103. void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst* ofst) {
  104. fst::StdVectorFst* fst = ReadFstKaldi(rxfilename);
  105. *ofst = *fst;
  106. delete fst;
  107. }
  108. void WriteFstKaldi(const VectorFst<StdArc>& fst, std::string wxfilename) {
  109. if (wxfilename == "") wxfilename = "-"; // interpret "" as stdout,
  110. // for compatibility with OpenFst conventions.
  111. bool write_binary = true, write_header = false;
  112. kaldi::Output ko(wxfilename, write_binary, write_header);
  113. FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
  114. fst.Write(ko.Stream(), wopts);
  115. }
  116. fst::VectorFst<fst::StdArc>* ReadAndPrepareLmFst(std::string rxfilename) {
  117. // ReadFstKaldi() will die with exception on failure.
  118. fst::VectorFst<fst::StdArc>* ans = fst::ReadFstKaldi(rxfilename);
  119. if (ans->Properties(fst::kAcceptor, true) == 0) {
  120. // If it's not already an acceptor, project on the output, i.e. copy olabels
  121. // to ilabels. Generally the G.fst's on disk will have the disambiguation
  122. // symbol #0 on the input symbols of the backoff arc, and projection will
  123. // replace them with epsilons which is what is on the output symbols of
  124. // those arcs.
  125. fst::Project(ans, fst::PROJECT_OUTPUT);
  126. }
  127. if (ans->Properties(fst::kILabelSorted, true) == 0) {
  128. // Make sure LM is sorted on ilabel.
  129. fst::ILabelCompare<fst::StdArc> ilabel_comp;
  130. fst::ArcSort(ans, ilabel_comp);
  131. }
  132. return ans;
  133. }
  134. } // end namespace fst