You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

98 lines
3.9 KiB

  1. // fstext/pre-determinize.h
  2. // Copyright 2009-2011 Microsoft Corporation
  3. // See ../../COPYING for clarification regarding multiple authors
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  12. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  13. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  14. // MERCHANTABLITY OR NON-INFRINGEMENT.
  15. // See the Apache 2 License for the specific language governing permissions and
  16. // limitations under the License.
  17. #ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
  18. #define KALDI_FSTEXT_PRE_DETERMINIZE_H_
  19. #include <fst/fst-decl.h>
  20. #include <fst/fstlib.h>
  21. #include <algorithm>
  22. #include <map>
  23. #include <set>
  24. #include <string>
  25. #include <vector>
  26. #include "base/kaldi-common.h"
  27. namespace fst {
  28. /* PreDeterminize inserts extra symbols on the input side of an FST as necessary
  29. to ensure that, after epsilon removal, it will be compactly determinizable by
  30. the determinize* algorithm. By compactly determinizable we mean that no
  31. original FST state is represented in more than one determinized state).
  32. Caution: this code is now only used in testing.
  33. The new symbols start from the value "first_new_symbol", which should be
  34. higher than the largest-numbered symbol currently in the FST. The new
  35. symbols added are put in the array syms_out, which should be empty at start.
  36. */
  37. template <class Arc, class Int>
  38. void PreDeterminize(MutableFst<Arc>* fst, typename Arc::Label first_new_symbol,
  39. std::vector<Int>* syms_out);
  40. /* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
  41. useful when you need to add a number of extra symbols to a different
  42. vocabulary from the one modified by PreDeterminize. */
  43. template <class Label>
  44. void CreateNewSymbols(SymbolTable* inputSymTable, int nSym, std::string prefix,
  45. std::vector<Label>* syms_out);
  46. /** AddSelfLoops is a function you will probably want to use alongside
  47. PreDeterminize, to add self-loops to any FSTs that you compose on the left
  48. hand side of the one modified by PreDeterminize.
  49. This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
  50. FST. This is done at each final state and each state with non-epsilon output
  51. symbols on at least one arc out of it. This is to ensure that these symbols,
  52. when inserted into the input side of an FST we will compose with on the
  53. right, can "pass through" this FST.
  54. At input, isyms and osyms must be vectors of the same size n, corresponding
  55. to symbols that currently do not exist in 'fst'. For each state in n that
  56. has non-epsilon symbols on the output side of arcs leaving it, or which is a
  57. final state, this function inserts n self-loops with unit weight and one of
  58. the n pairs of symbols on its input and output.
  59. */
  60. template <class Arc>
  61. void AddSelfLoops(MutableFst<Arc>* fst,
  62. const std::vector<typename Arc::Label>& isyms,
  63. const std::vector<typename Arc::Label>& osyms);
  64. /* DeleteSymbols replaces any instances of symbols in the vector symsIn,
  65. appearing on the input side, with epsilon. */
  66. /* It returns the number of instances of symbols deleted. */
  67. template <class Arc>
  68. int64 DeleteISymbols(MutableFst<Arc>* fst,
  69. std::vector<typename Arc::Label> symsIn);
  70. /* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
  71. final state with no transitions out and unit final weight, by inserting
  72. epsilon transitions as necessary. */
  73. template <class Arc>
  74. typename Arc::StateId CreateSuperFinal(MutableFst<Arc>* fst);
  75. } // end namespace fst
  76. #include "fstext/pre-determinize-inl.h"
  77. #endif // KALDI_FSTEXT_PRE_DETERMINIZE_H_