// Copyright 2005-2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the 'License'); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an 'AS IS' BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Utilities to convert strings into FSTs. #ifndef FST_STRING_H_ #define FST_STRING_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include DECLARE_string(fst_field_separator); namespace fst { enum class TokenType : uint8_t { SYMBOL = 1, BYTE = 2, UTF8 = 3 }; inline std::ostream &operator<<(std::ostream &strm, const TokenType &token_type) { switch (token_type) { case TokenType::BYTE: return strm << "byte"; case TokenType::UTF8: return strm << "utf8"; case TokenType::SYMBOL: return strm << "symbol"; } return strm; // unreachable } namespace internal { template bool ConvertSymbolToLabel(std::string_view str, const SymbolTable *syms, Label unknown_label, Label *output) { int64_t n; if (syms) { n = syms->Find(str); if ((n == kNoSymbol) && (unknown_label != kNoLabel)) n = unknown_label; if (n == kNoSymbol) { LOG(ERROR) << "ConvertSymbolToLabel: Symbol \"" << str << "\" is not mapped to any integer label, symbol table = " << syms->Name(); return false; } } else { const auto maybe_n = ParseInt64(str); if (!maybe_n.has_value()) { LOG(ERROR) << "ConvertSymbolToLabel: Bad label integer " << "= \"" << str << "\""; return false; } n = *maybe_n; } *output = n; return true; } template bool ConvertStringToLabels( std::string_view str, TokenType token_type, const SymbolTable *syms, Label unknown_label, std::vector