You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

114 lines
4.1 KiB

  1. // fstbin/fstdeterminizestar.cc
  2. // Copyright 2009-2011 Microsoft Corporation
  3. // See ../../COPYING for clarification regarding multiple authors
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  12. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  13. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  14. // MERCHANTABLITY OR NON-INFRINGEMENT.
  15. // See the Apache 2 License for the specific language governing permissions and
  16. // limitations under the License.
  17. #include "base/kaldi-common.h"
  18. #include "fst/fstlib.h"
  19. #include "fstext/determinize-star.h"
  20. #include "fstext/fstext-utils.h"
  21. #include "fstext/kaldi-fst-io.h"
  22. #include "util/parse-options.h"
  23. #if !defined(_MSC_VER) && !defined(__APPLE__)
  24. #include <signal.h> // Comment this line and the call to signal below if
  25. // it causes compilation problems. It is only to enable a debugging procedure
  26. // when determinization does not terminate. We are disabling this code if
  27. // compiling on Windows because signal.h is not available there, and on
  28. // MacOS due to a problem with <signal.h> in the initial release of Sierra.
  29. #endif
  30. /* some test examples:
  31. ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
  32. ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
  33. ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
  34. fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
  35. 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
  36. cd ~/tmpdir
  37. while true; do
  38. fstrand > 1.fst
  39. fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
  40. > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
  41. "." done
  42. Test of debugging [with non-determinizable input]:
  43. ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
  44. "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
  45. of fstdeterminizestar] # prints out a bunch of debugging output showing the
  46. mess it got itself into.
  47. */
  48. bool debug_location = false;
  49. void signal_handler(int) { debug_location = true; }
  50. int main(int argc, char* argv[]) {
  51. try {
  52. using namespace kaldi; // NOLINT
  53. using namespace fst; // NOLINT
  54. using kaldi::int32;
  55. const char* usage =
  56. "Removes epsilons and determinizes in one step\n"
  57. "\n"
  58. "Usage: fstdeterminizestar [in.fst [out.fst] ]\n"
  59. "\n"
  60. "See also: fstdeterminizelog, lattice-determinize\n";
  61. float delta = kDelta;
  62. int max_states = -1;
  63. bool use_log = false;
  64. ParseOptions po(usage);
  65. po.Register("use-log", &use_log, "Determinize in log semiring.");
  66. po.Register("delta", &delta,
  67. "Delta value used to determine equivalence of weights.");
  68. po.Register(
  69. "max-states", &max_states,
  70. "Maximum number of states in determinized FST before it will abort.");
  71. po.Read(argc, argv);
  72. if (po.NumArgs() > 2) {
  73. po.PrintUsage();
  74. exit(1);
  75. }
  76. std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
  77. // This enables us to get traceback info from determinization that is
  78. // not seeming to terminate.
  79. #if !defined(_MSC_VER) && !defined(__APPLE__)
  80. signal(SIGUSR1, signal_handler);
  81. #endif
  82. // Normal case: just files.
  83. VectorFst<StdArc>* fst = ReadFstKaldi(fst_in_str);
  84. ArcSort(fst, ILabelCompare<StdArc>()); // improves speed.
  85. if (use_log) {
  86. DeterminizeStarInLog(fst, delta, &debug_location, max_states);
  87. } else {
  88. VectorFst<StdArc> det_fst;
  89. DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
  90. *fst = det_fst; // will do shallow copy and then det_fst goes
  91. // out of scope anyway.
  92. }
  93. WriteFstKaldi(*fst, fst_out_str);
  94. delete fst;
  95. return 0;
  96. } catch (const std::exception& e) {
  97. std::cerr << e.what();
  98. return -1;
  99. }
  100. }