You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

241 lines
9.0 KiB

  1. // Copyright 2005-2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the 'License');
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an 'AS IS' BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // See www.openfst.org for extensive documentation on this weighted
  16. // finite-state transducer library.
  17. //
  18. // Functions to manipulate and test property bits.
  19. #ifndef FST_TEST_PROPERTIES_H_
  20. #define FST_TEST_PROPERTIES_H_
  21. #include <cstdint>
  22. #include <optional>
  23. #include <vector>
  24. #include <fst/flags.h>
  25. #include <fst/log.h>
  26. #include <fst/cc-visitors.h>
  27. #include <fst/dfs-visit.h>
  28. #include <fst/fst.h>
  29. #include <fst/properties.h>
  30. #include <fst/util.h>
  31. #include <unordered_set>
  32. DECLARE_bool(fst_verify_properties);
  33. namespace fst {
  34. namespace internal {
  35. // Computes FST property values defined in properties.h. The value of each
  36. // property indicated in the mask will be determined and returned (these will
  37. // never be unknown here). In the course of determining the properties
  38. // specifically requested in the mask, certain other properties may be
  39. // determined (those with little additional expense) and their values will be
  40. // returned as well. The complete set of known properties (whether true or
  41. // false) determined by this operation will be assigned to the value pointed
  42. // to by KNOWN. 'mask & required_mask' is used to determine whether the stored
  43. // properties can be used. This routine is seldom called directly; instead it is
  44. // used to implement fst.Properties(mask, /*test=*/true).
  45. template <class Arc>
  46. uint64_t ComputeProperties(const Fst<Arc> &fst, uint64_t mask,
  47. uint64_t *known) {
  48. using Label = typename Arc::Label;
  49. using StateId = typename Arc::StateId;
  50. using Weight = typename Arc::Weight;
  51. const auto fst_props = fst.Properties(kFstProperties, false); // FST-stored.
  52. // Computes (trinary) properties explicitly.
  53. // Initialize with binary properties (already known).
  54. uint64_t comp_props = fst_props & kBinaryProperties;
  55. // Computes these trinary properties with a DFS. We compute only those that
  56. // need a DFS here, since we otherwise would like to avoid a DFS since its
  57. // stack could grow large.
  58. constexpr uint64_t kDfsProps =
  59. kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic | kAccessible |
  60. kNotAccessible | kCoAccessible | kNotCoAccessible;
  61. std::vector<StateId> scc;
  62. if (mask & (kDfsProps | kWeightedCycles | kUnweightedCycles)) {
  63. SccVisitor<Arc> scc_visitor(&scc, nullptr, nullptr, &comp_props);
  64. DfsVisit(fst, &scc_visitor);
  65. }
  66. // Computes any remaining trinary properties via a state and arcs iterations
  67. if (mask & ~(kBinaryProperties | kDfsProps)) {
  68. comp_props |= kAcceptor | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
  69. kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted |
  70. kString;
  71. if (mask & (kIDeterministic | kNonIDeterministic)) {
  72. comp_props |= kIDeterministic;
  73. }
  74. if (mask & (kODeterministic | kNonODeterministic)) {
  75. comp_props |= kODeterministic;
  76. }
  77. if (mask & (kDfsProps | kWeightedCycles | kUnweightedCycles)) {
  78. comp_props |= kUnweightedCycles;
  79. }
  80. std::optional<std::unordered_set<Label>> ilabels;
  81. std::optional<std::unordered_set<Label>> olabels;
  82. StateId nfinal = 0;
  83. for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
  84. StateId s = siter.Value();
  85. Arc prev_arc;
  86. // Creates these only if we need to.
  87. if (mask & (kIDeterministic | kNonIDeterministic)) {
  88. ilabels.emplace();
  89. }
  90. if (mask & (kODeterministic | kNonODeterministic)) {
  91. olabels.emplace();
  92. }
  93. bool first_arc = true;
  94. for (ArcIterator<Fst<Arc>> aiter(fst, s); !aiter.Done(); aiter.Next()) {
  95. const auto &arc = aiter.Value();
  96. if (ilabels && ilabels->find(arc.ilabel) != ilabels->end()) {
  97. comp_props |= kNonIDeterministic;
  98. comp_props &= ~kIDeterministic;
  99. }
  100. if (olabels && olabels->find(arc.olabel) != olabels->end()) {
  101. comp_props |= kNonODeterministic;
  102. comp_props &= ~kODeterministic;
  103. }
  104. if (arc.ilabel != arc.olabel) {
  105. comp_props |= kNotAcceptor;
  106. comp_props &= ~kAcceptor;
  107. }
  108. if (arc.ilabel == 0 && arc.olabel == 0) {
  109. comp_props |= kEpsilons;
  110. comp_props &= ~kNoEpsilons;
  111. }
  112. if (arc.ilabel == 0) {
  113. comp_props |= kIEpsilons;
  114. comp_props &= ~kNoIEpsilons;
  115. }
  116. if (arc.olabel == 0) {
  117. comp_props |= kOEpsilons;
  118. comp_props &= ~kNoOEpsilons;
  119. }
  120. if (!first_arc) {
  121. if (arc.ilabel < prev_arc.ilabel) {
  122. comp_props |= kNotILabelSorted;
  123. comp_props &= ~kILabelSorted;
  124. }
  125. if (arc.olabel < prev_arc.olabel) {
  126. comp_props |= kNotOLabelSorted;
  127. comp_props &= ~kOLabelSorted;
  128. }
  129. }
  130. if (arc.weight != Weight::One() && arc.weight != Weight::Zero()) {
  131. comp_props |= kWeighted;
  132. comp_props &= ~kUnweighted;
  133. if ((comp_props & kUnweightedCycles) &&
  134. scc[s] == scc[arc.nextstate]) {
  135. comp_props |= kWeightedCycles;
  136. comp_props &= ~kUnweightedCycles;
  137. }
  138. }
  139. if (arc.nextstate <= s) {
  140. comp_props |= kNotTopSorted;
  141. comp_props &= ~kTopSorted;
  142. }
  143. if (arc.nextstate != s + 1) {
  144. comp_props |= kNotString;
  145. comp_props &= ~kString;
  146. }
  147. prev_arc = arc;
  148. first_arc = false;
  149. if (ilabels) ilabels->insert(arc.ilabel);
  150. if (olabels) olabels->insert(arc.olabel);
  151. }
  152. if (nfinal > 0) { // Final state not last.
  153. comp_props |= kNotString;
  154. comp_props &= ~kString;
  155. }
  156. const auto final_weight = fst.Final(s);
  157. if (final_weight != Weight::Zero()) { // Final state.
  158. if (final_weight != Weight::One()) {
  159. comp_props |= kWeighted;
  160. comp_props &= ~kUnweighted;
  161. }
  162. ++nfinal;
  163. } else { // Non-final state.
  164. if (fst.NumArcs(s) != 1) {
  165. comp_props |= kNotString;
  166. comp_props &= ~kString;
  167. }
  168. }
  169. }
  170. if (fst.Start() != kNoStateId && fst.Start() != 0) {
  171. comp_props |= kNotString;
  172. comp_props &= ~kString;
  173. }
  174. }
  175. if (known) *known = KnownProperties(comp_props);
  176. return comp_props;
  177. }
  178. // Similar to ComputeProperties, but uses the properties already stored
  179. // in the FST when possible.
  180. template <class Arc>
  181. uint64_t ComputeOrUseStoredProperties(const Fst<Arc> &fst, uint64_t mask,
  182. uint64_t *known) {
  183. // Check stored FST properties first.
  184. const auto fst_props = fst.Properties(kFstProperties, /*test=*/false);
  185. const auto known_props = KnownProperties(fst_props);
  186. // If FST contains required info, return it.
  187. if ((known_props & mask) == mask) {
  188. if (known) *known = known_props;
  189. return fst_props;
  190. }
  191. return ComputeProperties(fst, mask, known);
  192. }
  193. // This is a wrapper around ComputeProperties that will cause a fatal error if
  194. // the stored properties and the computed properties are incompatible when
  195. // FST_FLAGS_fst_verify_properties is true. This routine is seldom called directly;
  196. // instead it is used to implement fst.Properties(mask, /*test=*/true).
  197. template <class Arc>
  198. uint64_t TestProperties(const Fst<Arc> &fst, uint64_t mask, uint64_t *known) {
  199. if (FST_FLAGS_fst_verify_properties) {
  200. const auto stored_props = fst.Properties(kFstProperties, false);
  201. const auto computed_props = ComputeProperties(fst, mask, known);
  202. if (!CompatProperties(stored_props, computed_props)) {
  203. FSTERROR() << "TestProperties: stored FST properties incorrect"
  204. << " (stored: props1, computed: props2)";
  205. }
  206. return computed_props;
  207. } else {
  208. return ComputeOrUseStoredProperties(fst, mask, known);
  209. }
  210. }
  211. // If all the properties of 'fst' corresponding to 'check_mask' are known,
  212. // returns the stored properties. Otherwise, the properties corresponding to
  213. // both 'check_mask' and 'test_mask' are computed. This is used to check for
  214. // newly-added properties that might not be set in old binary files.
  215. template <class Arc>
  216. uint64_t CheckProperties(const Fst<Arc> &fst, uint64_t check_mask,
  217. uint64_t test_mask) {
  218. auto props = fst.Properties(kFstProperties, false);
  219. if (FST_FLAGS_fst_verify_properties) {
  220. props = TestProperties(fst, check_mask | test_mask, /*known=*/nullptr);
  221. } else if ((KnownProperties(props) & check_mask) != check_mask) {
  222. props = ComputeProperties(fst, check_mask | test_mask, /*known=*/nullptr);
  223. }
  224. return props & (check_mask | test_mask);
  225. }
  226. } // namespace internal
  227. } // namespace fst
  228. #endif // FST_TEST_PROPERTIES_H_