You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

158 lines
5.9 KiB

  1. // fstext/kaldi-fst-io.h
  2. // Copyright 2009-2011 Microsoft Corporation
  3. // 2012-2015 Johns Hopkins University (Author: Daniel Povey)
  4. // 2013 Guoguo Chen
  5. // See ../../COPYING for clarification regarding multiple authors
  6. //
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. //
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. //
  13. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  15. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  16. // MERCHANTABLITY OR NON-INFRINGEMENT.
  17. // See the Apache 2 License for the specific language governing permissions and
  18. // limitations under the License.
  19. #ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
  20. #define KALDI_FSTEXT_KALDI_FST_IO_H_
  21. #include <string>
  22. #include <utility>
  23. #include "fst/fst-decl.h"
  24. #include "fst/fstlib.h"
  25. #include "fst/script/print-impl.h"
  26. #include "base/kaldi-common.h"
  27. // Some functions for writing Fsts.
  28. // I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
  29. // generic I/O mechanisms, because we want files containing just FSTs to
  30. // be readable by OpenFST's native binaries, which is not compatible
  31. // with the normal \0B header that identifies Kaldi files as containing
  32. // binary data.
  33. // So use the functions here with your eyes open, and with caution!
  34. namespace fst {
  35. // Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
  36. // On error returns NULL. Only supports VectorFst and exists
  37. // mainly for backward code compabibility.
  38. VectorFst<StdArc>* ReadFstKaldi(std::string rxfilename);
  39. // Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
  40. // If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
  41. // otherwise it prints a warning and returns. Note:this
  42. // doesn't support the text-mode option that we generally like to support.
  43. // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
  44. // (const-fst can give better performance for decoding). Other
  45. // types could be also loaded if registered inside OpenFst.
  46. Fst<StdArc>* ReadFstKaldiGeneric(std::string rxfilename,
  47. bool throw_on_err = true);
  48. // This function attempts to dynamic_cast the pointer 'fst' (which will likely
  49. // have been returned by ReadFstGeneric()), to the more derived
  50. // type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
  51. // if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
  52. // initialized by 'fst'), prints a warning, and deletes 'fst'.
  53. VectorFst<StdArc>* CastOrConvertToVectorFst(Fst<StdArc>* fst);
  54. // Version of ReadFstKaldi() that writes to a pointer. Assumes
  55. // the FST is binary with no binary marker. Crashes on error.
  56. void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc>* ofst);
  57. // Write an FST using Kaldi I/O mechanisms (pipes, etc.)
  58. // On error, throws using KALDI_ERR. For use only in code in fstbin/,
  59. // as it doesn't support the text-mode option.
  60. void WriteFstKaldi(const VectorFst<StdArc>& fst, std::string wxfilename);
  61. // This is a more general Kaldi-type-IO mechanism of writing FSTs to
  62. // streams, supporting binary or text-mode writing. (note: we just
  63. // write the integers, symbol tables are not supported).
  64. // On error, throws using KALDI_ERR.
  65. template <class Arc>
  66. void WriteFstKaldi(std::ostream& os, bool binary, const VectorFst<Arc>& fst);
  67. // A generic Kaldi-type-IO mechanism of reading FSTs from streams,
  68. // supporting binary or text-mode reading/writing.
  69. template <class Arc>
  70. void ReadFstKaldi(std::istream& is, bool binary, VectorFst<Arc>* fst);
  71. // Read an FST file for LM (G.fst) and make it an acceptor,
  72. // and make sure it is sorted on labels
  73. fst::VectorFst<fst::StdArc>* ReadAndPrepareLmFst(std::string rxfilename);
  74. // This is a Holder class with T = VectorFst<Arc>, that meets the requirements
  75. // of a Holder class as described in ../util/kaldi-holder.h. This enables us to
  76. // read/write collections of FSTs indexed by strings, using the Table concept (
  77. // see ../util/kaldi-table.h).
  78. // Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
  79. // spotting stuff introduced more types of FSTs, we made it also templated on
  80. // the arc.
  81. template <class Arc>
  82. class VectorFstTplHolder {
  83. public:
  84. typedef VectorFst<Arc> T;
  85. VectorFstTplHolder() : t_(NULL) {}
  86. static bool Write(std::ostream& os, bool binary, const T& t);
  87. void Copy(const T& t) { // copies it into the holder.
  88. Clear();
  89. t_ = new T(t);
  90. }
  91. // Reads into the holder.
  92. bool Read(std::istream& is);
  93. // It's potentially a binary format, so must read in binary mode (linefeed
  94. // translation will corrupt the file. We don't know till we open the file if
  95. // it's really binary, so we need to read in binary mode to be on the safe
  96. // side. Extra linefeeds won't matter, the text-mode reading code ignores
  97. // them.
  98. static bool IsReadInBinary() { return true; }
  99. T& Value() {
  100. // code error if !t_.
  101. if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
  102. return *t_;
  103. }
  104. void Clear() {
  105. if (t_) {
  106. delete t_;
  107. t_ = NULL;
  108. }
  109. }
  110. void Swap(VectorFstTplHolder<Arc>* other) { std::swap(t_, other->t_); }
  111. bool ExtractRange(const VectorFstTplHolder<Arc>& other,
  112. const std::string& range) {
  113. KALDI_ERR << "ExtractRange is not defined for this type of holder.";
  114. return false;
  115. }
  116. ~VectorFstTplHolder() { Clear(); }
  117. // No destructor. Assignment and
  118. // copy constructor take their default implementations.
  119. private:
  120. KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
  121. T* t_;
  122. };
  123. // Now make the original VectorFstHolder as the typedef of
  124. // VectorFstHolder<StdArc>.
  125. typedef VectorFstTplHolder<StdArc> VectorFstHolder;
  126. } // end namespace fst
  127. #include "fstext/kaldi-fst-io-inl.h"
  128. #endif // KALDI_FSTEXT_KALDI_FST_IO_H_