|
|
// fstext/kaldi-fst-io-inl.h
// Copyright 2009-2011 Microsoft Corporation
// 2012-2015 Johns Hopkins University (Author: Daniel Povey)
// 2013 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
#include <string>
#include <vector>
#include "util/text-utils.h"
namespace fst {
template <class Arc> void WriteFstKaldi(std::ostream& os, bool binary, const VectorFst<Arc>& t) { bool ok; if (binary) { // Binary-mode writing.
ok = t.Write(os, FstWriteOptions()); } else { // Text-mode output. Note: we expect that t.InputSymbols() and
// t.OutputSymbols() would always return NULL. The corresponding input
// routine would not work if the FST actually had symbols attached. Write a
// newline to start the FST; in a table, the first line of the FST will
// appear on its own line.
os << '\n'; bool acceptor = false, write_one = false; FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL, acceptor, write_one, "\t"); printer.Print(&os, "<unknown>"); if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream"; // Write another newline as a terminating character. The read routine will
// detect this [this is a Kaldi mechanism, not something in the original
// OpenFst code].
os << '\n'; ok = os.good(); } if (!ok) { KALDI_ERR << "Error writing FST to stream"; } }
// Utility function used in ReadFstKaldi
template <class W> inline bool StrToWeight(const std::string& s, bool allow_zero, W* w) { std::istringstream strm(s); strm >> *w; if (strm.fail() || (!allow_zero && *w == W::Zero())) { return false; } return true; }
template <class Arc> void ReadFstKaldi(std::istream& is, bool binary, VectorFst<Arc>* fst) { typedef typename Arc::Weight Weight; typedef typename Arc::StateId StateId; if (binary) { // We don't have access to the filename here, so write [unknown].
VectorFst<Arc>* ans = VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]"))); if (ans == NULL) { KALDI_ERR << "Error reading FST from stream."; } *fst = *ans; // shallow copy.
delete ans; } else { // Consume the \r on Windows, the \n that the text-form FST format starts
// with, and any extra spaces that might have got in there somehow.
while (std::isspace(is.peek()) && is.peek() != '\n') is.get(); if (is.peek() == '\n') { is.get(); // consume the newline.
} else { // saw spaces but no newline.. this is not expected.
KALDI_ERR << "Reading FST: unexpected sequence of spaces " << " at file position " << is.tellg(); } using kaldi::ConvertStringToInteger; using kaldi::SplitStringToIntegers; using std::string; using std::vector; fst->DeleteStates(); string line; size_t nline = 0; string separator = FLAGS_fst_field_separator + "\r\n"; while (std::getline(is, line)) { nline++; vector<string> col; // on Windows we'll write in text and read in binary mode.
kaldi::SplitStringToVector(line, separator.c_str(), true, &col); if (col.size() == 0) break; // Empty line is a signal to stop, in our
// archive format.
if (col.size() > 5) { KALDI_ERR << "Bad line in FST: " << line; } StateId s; if (!ConvertStringToInteger(col[0], &s)) { KALDI_ERR << "Bad line in FST: " << line; } while (s >= fst->NumStates()) fst->AddState(); if (nline == 1) fst->SetStart(s);
bool ok = true; Arc arc; Weight w; StateId d = s; switch (col.size()) { case 1: fst->SetFinal(s, Weight::One()); break; case 2: if (!StrToWeight(col[1], true, &w)) ok = false; else fst->SetFinal(s, w); break; case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
ok = false; break; case 4: ok = ConvertStringToInteger(col[1], &arc.nextstate) && ConvertStringToInteger(col[2], &arc.ilabel) && ConvertStringToInteger(col[3], &arc.olabel); if (ok) { d = arc.nextstate; arc.weight = Weight::One(); fst->AddArc(s, arc); } break; case 5: ok = ConvertStringToInteger(col[1], &arc.nextstate) && ConvertStringToInteger(col[2], &arc.ilabel) && ConvertStringToInteger(col[3], &arc.olabel) && StrToWeight(col[4], false, &arc.weight); if (ok) { d = arc.nextstate; fst->AddArc(s, arc); } break; default: ok = false; } while (d >= fst->NumStates()) fst->AddState(); if (!ok) KALDI_ERR << "Bad line in FST: " << line; } } }
template <class Arc> // static
bool VectorFstTplHolder<Arc>::Write(std::ostream& os, bool binary, const T& t) { try { WriteFstKaldi(os, binary, t); return true; } catch (...) { return false; } }
template <class Arc> // static
bool VectorFstTplHolder<Arc>::Read(std::istream& is) { Clear(); int c = is.peek(); if (c == -1) { KALDI_WARN << "End of stream detected reading Fst"; return false; } else if (isspace(c)) { // The text form of the FST begins
// with space (normally, '\n'), so this means it's text (the binary form
// cannot begin with space because it starts with the FST Type() which is
// not space).
try { t_ = new VectorFst<Arc>(); ReadFstKaldi(is, false, t_); } catch (...) { Clear(); return false; } } else { // reading a binary FST.
try { t_ = new VectorFst<Arc>(); ReadFstKaldi(is, true, t_); } catch (...) { Clear(); return false; } } return true; }
} // namespace fst.
#endif // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
|