You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

229 lines
7.7 KiB

  1. // Copyright 2005-2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the 'License');
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an 'AS IS' BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // See www.openfst.org for extensive documentation on this weighted
  16. // finite-state transducer library.
  17. //
  18. // Class to compute the difference between two FSAs.
  19. #ifndef FST_DIFFERENCE_H_
  20. #define FST_DIFFERENCE_H_
  21. #include <memory>
  22. #include <fst/log.h>
  23. #include <fst/arc.h>
  24. #include <fst/cache.h>
  25. #include <fst/complement.h>
  26. #include <fst/compose-filter.h>
  27. #include <fst/compose.h>
  28. #include <fst/connect.h>
  29. #include <fst/float-weight.h>
  30. #include <fst/fst.h>
  31. #include <fst/impl-to-fst.h>
  32. #include <fst/matcher.h>
  33. #include <fst/mutable-fst.h>
  34. #include <fst/properties.h>
  35. #include <fst/state-table.h>
  36. #include <fst/util.h>
  37. namespace fst {
  38. template <class Arc, class M = Matcher<Fst<Arc>>,
  39. class Filter = SequenceComposeFilter<M>,
  40. class StateTable =
  41. GenericComposeStateTable<Arc, typename Filter::FilterState>>
  42. struct DifferenceFstOptions
  43. : public ComposeFstOptions<Arc, M, Filter, StateTable> {
  44. explicit DifferenceFstOptions(const CacheOptions &opts = CacheOptions(),
  45. M *matcher1 = nullptr, M *matcher2 = nullptr,
  46. Filter *filter = nullptr,
  47. StateTable *state_table = nullptr)
  48. : ComposeFstOptions<Arc, M, Filter, StateTable>(opts, matcher1, matcher2,
  49. filter, state_table) {}
  50. };
  51. // Computes the difference between two FSAs. This version is a delayed FST.
  52. // Only strings that are in the first automaton but not in second are retained
  53. // in the result.
  54. //
  55. // The first argument must be an acceptor; the second argument must be an
  56. // unweighted, epsilon-free, deterministic acceptor. One of the arguments must
  57. // be label-sorted.
  58. //
  59. // Complexity: same as ComposeFst.
  60. //
  61. // Caveats: same as ComposeFst.
  62. template <class A>
  63. class DifferenceFst : public ComposeFst<A> {
  64. public:
  65. using Arc = A;
  66. using Weight = typename Arc::Weight;
  67. using StateId = typename Arc::StateId;
  68. using ComposeFst<Arc>::CreateBase1;
  69. // A - B = A ^ B'.
  70. DifferenceFst(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
  71. const CacheOptions &opts = CacheOptions())
  72. : ComposeFst<Arc>(CreateDifferenceImplWithCacheOpts(fst1, fst2, opts)) {
  73. if (!fst1.Properties(kAcceptor, true)) {
  74. FSTERROR() << "DifferenceFst: 1st argument not an acceptor";
  75. GetImpl()->SetProperties(kError, kError);
  76. }
  77. }
  78. template <class Matcher, class Filter, class StateTable>
  79. DifferenceFst(
  80. const Fst<Arc> &fst1, const Fst<Arc> &fst2,
  81. const DifferenceFstOptions<Arc, Matcher, Filter, StateTable> &opts)
  82. : ComposeFst<Arc>(
  83. CreateDifferenceImplWithDifferenceOpts(fst1, fst2, opts)) {
  84. if (!fst1.Properties(kAcceptor, true)) {
  85. FSTERROR() << "DifferenceFst: 1st argument not an acceptor";
  86. GetImpl()->SetProperties(kError, kError);
  87. }
  88. }
  89. // See Fst<>::Copy() for doc.
  90. DifferenceFst(const DifferenceFst &fst, bool safe = false)
  91. : ComposeFst<Arc>(fst, safe) {}
  92. // Get a copy of this DifferenceFst. See Fst<>::Copy() for further doc.
  93. DifferenceFst *Copy(bool safe = false) const override {
  94. return new DifferenceFst(*this, safe);
  95. }
  96. private:
  97. using Impl = internal::ComposeFstImplBase<Arc>;
  98. using ImplToFst<Impl>::GetImpl;
  99. static std::shared_ptr<Impl> CreateDifferenceImplWithCacheOpts(
  100. const Fst<Arc> &fst1, const Fst<Arc> &fst2, const CacheOptions &opts) {
  101. using RM = RhoMatcher<Matcher<Fst<A>>>;
  102. ComplementFst<Arc> cfst(fst2);
  103. ComposeFstOptions<A, RM> copts(
  104. CacheOptions(), new RM(fst1, MATCH_NONE),
  105. new RM(cfst, MATCH_INPUT, ComplementFst<Arc>::kRhoLabel));
  106. return CreateBase1(fst1, cfst, copts);
  107. }
  108. template <class Matcher, class Filter, class StateTable>
  109. static std::shared_ptr<Impl> CreateDifferenceImplWithDifferenceOpts(
  110. const Fst<Arc> &fst1, const Fst<Arc> &fst2,
  111. const DifferenceFstOptions<Arc, Matcher, Filter, StateTable> &opts) {
  112. using RM = RhoMatcher<Matcher>;
  113. ComplementFst<Arc> cfst(fst2);
  114. ComposeFstOptions<Arc, RM> copts(opts);
  115. copts.matcher1 = new RM(fst1, MATCH_NONE, kNoLabel, MATCHER_REWRITE_ALWAYS,
  116. opts.matcher1);
  117. copts.matcher2 = new RM(cfst, MATCH_INPUT, ComplementFst<Arc>::kRhoLabel,
  118. MATCHER_REWRITE_ALWAYS, opts.matcher2);
  119. return CreateBase1(fst1, cfst, copts);
  120. }
  121. };
  122. // Specialization for DifferenceFst.
  123. template <class Arc>
  124. class StateIterator<DifferenceFst<Arc>>
  125. : public StateIterator<ComposeFst<Arc>> {
  126. public:
  127. explicit StateIterator(const DifferenceFst<Arc> &fst)
  128. : StateIterator<ComposeFst<Arc>>(fst) {}
  129. };
  130. // Specialization for DifferenceFst.
  131. template <class Arc>
  132. class ArcIterator<DifferenceFst<Arc>> : public ArcIterator<ComposeFst<Arc>> {
  133. public:
  134. using StateId = typename Arc::StateId;
  135. ArcIterator(const DifferenceFst<Arc> &fst, StateId s)
  136. : ArcIterator<ComposeFst<Arc>>(fst, s) {}
  137. };
  138. using DifferenceOptions = ComposeOptions;
  139. // Useful alias when using StdArc.
  140. using StdDifferenceFst = DifferenceFst<StdArc>;
  141. using DifferenceOptions = ComposeOptions;
  142. // Computes the difference between two FSAs. This version writes the difference
  143. // to an output MutableFst. Only strings that are in the first automaton but not
  144. // in the second are retained in the result.
  145. //
  146. // The first argument must be an acceptor; the second argument must be an
  147. // unweighted, epsilon-free, deterministic acceptor. One of the arguments must
  148. // be label-sorted.
  149. //
  150. // Complexity: same as Compose.
  151. //
  152. // Caveats: same as Compose.
  153. template <class Arc>
  154. void Difference(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
  155. MutableFst<Arc> *ofst,
  156. const DifferenceOptions &opts = DifferenceOptions()) {
  157. using M = Matcher<Fst<Arc>>;
  158. // In each case, we cache only the last state for fastest copy.
  159. switch (opts.filter_type) {
  160. case AUTO_FILTER: {
  161. CacheOptions nopts;
  162. nopts.gc_limit = 0;
  163. *ofst = DifferenceFst<Arc>(ifst1, ifst2, nopts);
  164. break;
  165. }
  166. case SEQUENCE_FILTER: {
  167. DifferenceFstOptions<Arc> dopts;
  168. dopts.gc_limit = 0;
  169. *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
  170. break;
  171. }
  172. case ALT_SEQUENCE_FILTER: {
  173. DifferenceFstOptions<Arc, M, AltSequenceComposeFilter<M>> dopts;
  174. dopts.gc_limit = 0;
  175. *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
  176. break;
  177. }
  178. case MATCH_FILTER: {
  179. DifferenceFstOptions<Arc, M, MatchComposeFilter<M>> dopts;
  180. dopts.gc_limit = 0;
  181. *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
  182. break;
  183. }
  184. case NO_MATCH_FILTER: {
  185. DifferenceFstOptions<Arc, M, NoMatchComposeFilter<M>> dopts;
  186. dopts.gc_limit = 0;
  187. *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
  188. break;
  189. }
  190. case NULL_FILTER: {
  191. DifferenceFstOptions<Arc, M, NullComposeFilter<M>> dopts;
  192. dopts.gc_limit = 0;
  193. *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
  194. break;
  195. }
  196. case TRIVIAL_FILTER: {
  197. DifferenceFstOptions<Arc, M, TrivialComposeFilter<M>> dopts;
  198. dopts.gc_limit = 0;
  199. *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
  200. break;
  201. }
  202. }
  203. if (opts.connect) Connect(ofst);
  204. }
  205. } // namespace fst
  206. #endif // FST_DIFFERENCE_H_