You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

81 lines
2.8 KiB

  1. // Copyright 2005-2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the 'License');
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an 'AS IS' BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // See www.openfst.org for extensive documentation on this weighted
  16. // finite-state transducer library.
  17. //
  18. // Function that implements epsilon-normalization.
  19. #ifndef FST_EPSNORMALIZE_H_
  20. #define FST_EPSNORMALIZE_H_
  21. #include <memory>
  22. #include <fst/arc-map.h>
  23. #include <fst/arc.h>
  24. #include <fst/factor-weight.h>
  25. #include <fst/fst.h>
  26. #include <fst/invert.h>
  27. #include <fst/mutable-fst.h>
  28. #include <fst/rmepsilon.h>
  29. #include <fst/string-weight.h>
  30. #include <fst/symbol-table.h>
  31. #include <fst/vector-fst.h>
  32. namespace fst {
  33. enum EpsNormalizeType { EPS_NORM_INPUT, EPS_NORM_OUTPUT };
  34. // Returns an equivalent FST that is epsilon-normalized. An acceptor is
  35. // epsilon-normalized if it is epsilon-removed. A transducer is input
  36. // epsilon-normalized if additionally if on each path any epsilon input
  37. // label follows all non-epsilon input labels. Output epsilon-normalized
  38. // is defined similarly.
  39. //
  40. // For more information, see:
  41. //
  42. // Mohri, M. 2002. Generic epsilon-removal and input epsilon-normalization
  43. // algorithms for weighted transducers. International Journal of Computer
  44. // Science, 13(1): 129-143, 2002.
  45. template <class Arc>
  46. void EpsNormalize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
  47. EpsNormalizeType type = EPS_NORM_INPUT) {
  48. EpsNormalize<Arc, GALLIC>(ifst, ofst, type);
  49. }
  50. // Same as above, except allows specifying explicitly the gallic weight type.
  51. template <class Arc, GallicType G>
  52. void EpsNormalize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
  53. EpsNormalizeType type) {
  54. VectorFst<GallicArc<Arc, G>> gfst;
  55. std::unique_ptr<SymbolTable> symbols;
  56. if (type == EPS_NORM_INPUT) {
  57. ArcMap(ifst, &gfst, ToGallicMapper<Arc, G>());
  58. if (ifst.OutputSymbols()) symbols.reset(ifst.OutputSymbols()->Copy());
  59. } else { // type == EPS_NORM_OUTPUT
  60. ArcMap(InvertFst<Arc>(ifst), &gfst, ToGallicMapper<Arc, G>());
  61. if (ifst.InputSymbols()) symbols.reset(ifst.InputSymbols()->Copy());
  62. }
  63. RmEpsilon(&gfst);
  64. FactorWeightFst<GallicArc<Arc, G>,
  65. GallicFactor<typename Arc::Label, typename Arc::Weight, G>>
  66. fwfst(gfst);
  67. ArcMap(fwfst, ofst, FromGallicMapper<Arc, G>());
  68. ofst->SetOutputSymbols(symbols.get());
  69. if (type == EPS_NORM_OUTPUT) Invert(ofst);
  70. }
  71. } // namespace fst
  72. #endif // FST_EPSNORMALIZE_H_