You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

898 lines
28 KiB

  1. // util/kaldi-io.cc
  2. // Copyright 2009-2011 Microsoft Corporation; Jan Silovsky
  3. // 2016 Xiaohui Zhang
  4. // See ../../COPYING for clarification regarding multiple authors
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License");
  7. // you may not use this file except in compliance with the License.
  8. // You may obtain a copy of the License at
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  11. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  12. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  13. // MERCHANTABLITY OR NON-INFRINGEMENT.
  14. // See the Apache 2 License for the specific language governing permissions and
  15. // limitations under the License.
  16. #include "util/kaldi-io.h"
  17. #include <errno.h>
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <cstdlib>
  21. #include "base/io-funcs.h"
  22. #include "base/kaldi-math.h"
  23. #include "util/kaldi-pipebuf.h"
  24. #include "util/parse-options.h"
  25. #include "util/text-utils.h"
  26. #ifdef KALDI_CYGWIN_COMPAT
  27. #include "util/kaldi-cygwin-io-inl.h"
  28. #define MapOsPath(x) MapCygwinPath(x)
  29. #else // KALDI_CYGWIN_COMPAT
  30. #define MapOsPath(x) x
  31. #endif // KALDI_CYGWIN_COMPAT
  32. #if defined(_MSC_VER)
  33. static FILE* popen(const char* command, const char* mode) {
  34. #ifdef KALDI_CYGWIN_COMPAT
  35. return kaldi::CygwinCompatPopen(command, mode);
  36. #else // KALDI_CYGWIN_COMPAT
  37. return _popen(command, mode);
  38. #endif // KALDI_CYGWIN_COMPAT
  39. }
  40. #endif // _MSC_VER
  41. namespace kaldi {
  42. #ifndef _MSC_VER // on VS, we don't need this type.
  43. // could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
  44. // Would mean we could use less of our own code.
  45. typedef basic_pipebuf<char> PipebufType;
  46. #endif
  47. } // namespace kaldi
  48. namespace kaldi {
  49. std::string PrintableRxfilename(const std::string& rxfilename) {
  50. if (rxfilename == "" || rxfilename == "-") {
  51. return "standard input";
  52. } else {
  53. // If this call to Escape later causes compilation issues,
  54. // just replace it with "return rxfilename"; it's only a
  55. // pretty-printing issue.
  56. return ParseOptions::Escape(rxfilename);
  57. }
  58. }
  59. std::string PrintableWxfilename(const std::string& wxfilename) {
  60. if (wxfilename == "" || wxfilename == "-") {
  61. return "standard output";
  62. } else {
  63. // If this call to Escape later causes compilation issues,
  64. // just replace it with "return wxfilename"; it's only a
  65. // pretty-printing issue.
  66. return ParseOptions::Escape(wxfilename);
  67. }
  68. }
  69. OutputType ClassifyWxfilename(const std::string& filename) {
  70. const char* c = filename.c_str();
  71. size_t length = filename.length();
  72. char first_char = c[0],
  73. last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
  74. // if 'filename' is "" or "-", return kStandardOutput.
  75. if (length == 0 || (length == 1 && first_char == '-')) {
  76. return kStandardOutput;
  77. } else if (first_char == '|') {
  78. return kPipeOutput; // An output pipe like "|blah".
  79. } else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
  80. return kNoOutput; // Leading or trailing space: can't interpret this.
  81. // Final '|' would represent an input pipe, not an
  82. // output pipe.
  83. // } else if ((first_char == 'a' || first_char == 's') &&
  84. // strchr(c, ':') != NULL &&
  85. // (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
  86. // kNoWspecifier ||
  87. // ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
  88. // // e.g. ark:something or scp:something... this is almost certainly a
  89. // // scripting error, so call it an error rather than treating it as a
  90. // file.
  91. // // In practice in modern kaldi scripts all (r,w)filenames begin with
  92. // "ark"
  93. // // or "scp", even though technically speaking options like "b", "t",
  94. // "s" or
  95. // // "cs" can appear before the ark or scp, like "b,ark". For
  96. // efficiency,
  97. // // and because this code is really just a nicety to catch errors
  98. // earlier
  99. // // than they would otherwise be caught, we only call those extra
  100. // functions
  101. // // for filenames beginning with 'a' or 's'.
  102. // return kNoOutput;
  103. } else if (isdigit(last_char)) {
  104. // This could be a file, but we have to see if it's an offset into a file
  105. // (like foo.ark:4314328), which is not allowed for writing (but is
  106. // allowed for reaching). This eliminates some things which would be
  107. // valid UNIX filenames but are not allowed by Kaldi. (Even if we allowed
  108. // such filenames for writing, we woudln't be able to correctly read them).
  109. const char* d = c + length - 1;
  110. while (isdigit(*d) && d > c) d--;
  111. if (*d == ':') return kNoOutput;
  112. // else it could still be a filename; continue to the next check.
  113. }
  114. // At this point it matched no other pattern so we assume a filename, but we
  115. // check for internal '|' as it's a common source of errors to have pipe
  116. // commands without the pipe in the right place. Say that it can't be
  117. // classified.
  118. if (strchr(c, '|') != NULL) {
  119. KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
  120. " wrong place (pipe without | at the beginning?): "
  121. << filename;
  122. return kNoOutput;
  123. }
  124. return kFileOutput; // It matched no other pattern: assume it's a filename.
  125. }
  126. InputType ClassifyRxfilename(const std::string& filename) {
  127. const char* c = filename.c_str();
  128. size_t length = filename.length();
  129. char first_char = c[0],
  130. last_char = (length == 0 ? '\0' : c[filename.length() - 1]);
  131. // if 'filename' is "" or "-", return kStandardInput.
  132. if (length == 0 || (length == 1 && first_char == '-')) {
  133. return kStandardInput;
  134. } else if (first_char == '|') {
  135. return kNoInput; // An output pipe like "|blah": not
  136. // valid for input.
  137. } else if (last_char == '|') {
  138. return kPipeInput;
  139. } else if (isspace(first_char) || isspace(last_char)) {
  140. return kNoInput; // We don't allow leading or trailing space in a filename.
  141. // } else if ((first_char == 'a' || first_char == 's') &&
  142. // strchr(c, ':') != NULL &&
  143. // (ClassifyWspecifier(filename, NULL, NULL, NULL) !=
  144. // kNoWspecifier ||
  145. // ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
  146. // // e.g. ark:something or scp:something... this is almost certainly a
  147. // // scripting error, so call it an error rather than treating it as a
  148. // file.
  149. // // In practice in modern kaldi scripts all (r,w)filenames begin with
  150. // "ark"
  151. // // or "scp", even though technically speaking options like "b", "t",
  152. // "s" or
  153. // // "cs" can appear before the ark or scp, like "b,ark". For
  154. // efficiency,
  155. // // and because this code is really just a nicety to catch errors
  156. // earlier
  157. // // than they would otherwise be caught, we only call those extra
  158. // functions
  159. // // for filenames beginning with 'a' or 's'.
  160. // return kNoInput;
  161. } else if (isdigit(last_char)) {
  162. const char* d = c + length - 1;
  163. while (isdigit(*d) && d > c) d--;
  164. if (*d == ':')
  165. return kOffsetFileInput; // Filename is like
  166. // some_file:12345
  167. // otherwise it could still be a filename; continue to the next check.
  168. }
  169. // At this point it matched no other pattern so we assume a filename, but
  170. // we check for '|' as it's a common source of errors to have pipe
  171. // commands without the pipe in the right place. Say that it can't be
  172. // classified in this case.
  173. if (strchr(c, '|') != NULL) {
  174. KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
  175. " wrong place (pipe without | at the end?): "
  176. << filename;
  177. return kNoInput;
  178. }
  179. return kFileInput; // It matched no other pattern: assume it's a filename.
  180. }
  181. class OutputImplBase {
  182. public:
  183. // Open will open it as a file (no header), and return true
  184. // on success. It cannot be called on an already open stream.
  185. virtual bool Open(const std::string& filename, bool binary) = 0;
  186. virtual std::ostream& Stream() = 0;
  187. virtual bool Close() = 0;
  188. virtual ~OutputImplBase() {}
  189. };
  190. class FileOutputImpl : public OutputImplBase {
  191. public:
  192. virtual bool Open(const std::string& filename, bool binary) {
  193. if (os_.is_open())
  194. KALDI_ERR << "FileOutputImpl::Open(), "
  195. << "open called on already open file.";
  196. filename_ = filename;
  197. os_.open(MapOsPath(filename_).c_str(),
  198. binary ? std::ios_base::out | std::ios_base::binary
  199. : std::ios_base::out);
  200. return os_.is_open();
  201. }
  202. virtual std::ostream& Stream() {
  203. if (!os_.is_open())
  204. KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
  205. // I believe this error can only arise from coding error.
  206. return os_;
  207. }
  208. virtual bool Close() {
  209. if (!os_.is_open())
  210. KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
  211. // I believe this error can only arise from coding error.
  212. os_.close();
  213. return !(os_.fail());
  214. }
  215. virtual ~FileOutputImpl() {
  216. if (os_.is_open()) {
  217. os_.close();
  218. if (os_.fail()) KALDI_ERR << "Error closing output file " << filename_;
  219. }
  220. }
  221. private:
  222. std::string filename_;
  223. std::ofstream os_;
  224. };
  225. class StandardOutputImpl : public OutputImplBase {
  226. public:
  227. StandardOutputImpl() : is_open_(false) {}
  228. virtual bool Open(const std::string& filename, bool binary) {
  229. if (is_open_)
  230. KALDI_ERR << "StandardOutputImpl::Open(), "
  231. "open called on already open file.";
  232. #ifdef _MSC_VER
  233. _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
  234. #endif
  235. is_open_ = std::cout.good();
  236. return is_open_;
  237. }
  238. virtual std::ostream& Stream() {
  239. if (!is_open_)
  240. KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
  241. // I believe this error can only arise from coding error.
  242. return std::cout;
  243. }
  244. virtual bool Close() {
  245. if (!is_open_)
  246. KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
  247. is_open_ = false;
  248. std::cout << std::flush;
  249. return !(std::cout.fail());
  250. }
  251. virtual ~StandardOutputImpl() {
  252. if (is_open_) {
  253. std::cout << std::flush;
  254. if (std::cout.fail()) KALDI_ERR << "Error writing to standard output";
  255. }
  256. }
  257. private:
  258. bool is_open_;
  259. };
  260. class PipeOutputImpl : public OutputImplBase {
  261. public:
  262. PipeOutputImpl() : f_(NULL), os_(NULL) {}
  263. virtual bool Open(const std::string& wxfilename, bool binary) {
  264. filename_ = wxfilename;
  265. KALDI_ASSERT(f_ == NULL); // Make sure closed.
  266. KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|'); // should
  267. // start with '|'
  268. std::string cmd_name(wxfilename, 1);
  269. #if defined(_MSC_VER) || defined(__CYGWIN__)
  270. f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
  271. #else
  272. f_ = popen(cmd_name.c_str(), "w");
  273. #endif
  274. if (!f_) { // Failure.
  275. KALDI_WARN << "Failed opening pipe for writing, command is: " << cmd_name
  276. << ", errno is " << strerror(errno);
  277. return false;
  278. } else {
  279. #ifndef _MSC_VER
  280. fb_ = new PipebufType(f_, // Using this constructor won't make the
  281. // destructor try to close the stream when
  282. // we're done.
  283. (binary ? std::ios_base::out | std::ios_base::binary
  284. : std::ios_base::out));
  285. KALDI_ASSERT(fb_ != NULL); // or would be alloc error.
  286. os_ = new std::ostream(fb_);
  287. #else
  288. os_ = new std::ofstream(f_);
  289. #endif
  290. return os_->good();
  291. }
  292. }
  293. virtual std::ostream& Stream() {
  294. if (os_ == NULL)
  295. KALDI_ERR << "PipeOutputImpl::Stream(),"
  296. " object not initialized.";
  297. // I believe this error can only arise from coding error.
  298. return *os_;
  299. }
  300. virtual bool Close() {
  301. if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
  302. bool ok = true;
  303. os_->flush();
  304. if (os_->fail()) ok = false;
  305. delete os_;
  306. os_ = NULL;
  307. int status;
  308. #ifdef _MSC_VER
  309. status = _pclose(f_);
  310. #else
  311. status = pclose(f_);
  312. #endif
  313. if (status)
  314. KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
  315. << status;
  316. f_ = NULL;
  317. #ifndef _MSC_VER
  318. delete fb_;
  319. fb_ = NULL;
  320. #endif
  321. return ok;
  322. }
  323. virtual ~PipeOutputImpl() {
  324. if (os_) {
  325. if (!Close())
  326. KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
  327. }
  328. }
  329. private:
  330. std::string filename_;
  331. FILE* f_;
  332. #ifndef _MSC_VER
  333. PipebufType* fb_;
  334. #endif
  335. std::ostream* os_;
  336. };
  337. class InputImplBase {
  338. public:
  339. // Open will open it as a file, and return true on success.
  340. // May be called twice only for kOffsetFileInput (otherwise,
  341. // if called twice, we just create a new Input object, to avoid
  342. // having to deal with the extra hassle of reopening with the
  343. // same object.
  344. // Note that we will to call Open with true (binary) for
  345. // for text-mode Kaldi files; the only actual text-mode input
  346. // is for non-Kaldi files.
  347. virtual bool Open(const std::string& filename, bool binary) = 0;
  348. virtual std::istream& Stream() = 0;
  349. virtual int32 Close() = 0; // We only need to check failure in the case of
  350. // kPipeInput.
  351. // on close for input streams.
  352. virtual InputType MyType() = 0; // Because if it's kOffsetFileInput, we may
  353. // call Open twice
  354. // (has efficiency benefits).
  355. virtual ~InputImplBase() {}
  356. };
  357. class FileInputImpl : public InputImplBase {
  358. public:
  359. virtual bool Open(const std::string& filename, bool binary) {
  360. if (is_.is_open())
  361. KALDI_ERR << "FileInputImpl::Open(), "
  362. << "open called on already open file.";
  363. is_.open(
  364. MapOsPath(filename).c_str(),
  365. binary ? std::ios_base::in | std::ios_base::binary : std::ios_base::in);
  366. return is_.is_open();
  367. }
  368. virtual std::istream& Stream() {
  369. if (!is_.is_open())
  370. KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
  371. // I believe this error can only arise from coding error.
  372. return is_;
  373. }
  374. virtual int32 Close() {
  375. if (!is_.is_open())
  376. KALDI_ERR << "FileInputImpl::Close(), file is not open.";
  377. // I believe this error can only arise from coding error.
  378. is_.close();
  379. // Don't check status.
  380. return 0;
  381. }
  382. virtual InputType MyType() { return kFileInput; }
  383. virtual ~FileInputImpl() {
  384. // Stream will automatically be closed, and we don't care about
  385. // whether it fails.
  386. }
  387. private:
  388. std::ifstream is_;
  389. };
  390. class StandardInputImpl : public InputImplBase {
  391. public:
  392. StandardInputImpl() : is_open_(false) {}
  393. virtual bool Open(const std::string& filename, bool binary) {
  394. if (is_open_)
  395. KALDI_ERR << "StandardInputImpl::Open(), "
  396. "open called on already open file.";
  397. is_open_ = true;
  398. #ifdef _MSC_VER
  399. _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
  400. #endif
  401. return true; // Don't check good() because would be false if
  402. // eof, which may be valid input.
  403. }
  404. virtual std::istream& Stream() {
  405. if (!is_open_)
  406. KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
  407. // I believe this error can only arise from coding error.
  408. return std::cin;
  409. }
  410. virtual InputType MyType() { return kStandardInput; }
  411. virtual int32 Close() {
  412. if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
  413. is_open_ = false;
  414. return 0;
  415. }
  416. virtual ~StandardInputImpl() {}
  417. private:
  418. bool is_open_;
  419. };
  420. class PipeInputImpl : public InputImplBase {
  421. public:
  422. PipeInputImpl() : f_(NULL), is_(NULL) {}
  423. virtual bool Open(const std::string& rxfilename, bool binary) {
  424. filename_ = rxfilename;
  425. KALDI_ASSERT(f_ == NULL); // Make sure closed.
  426. KALDI_ASSERT(rxfilename.length() != 0 &&
  427. rxfilename[rxfilename.length() - 1] ==
  428. '|'); // should end with '|'
  429. std::string cmd_name(rxfilename, 0, rxfilename.length() - 1);
  430. #if defined(_MSC_VER) || defined(__CYGWIN__)
  431. f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
  432. #else
  433. f_ = popen(cmd_name.c_str(), "r");
  434. #endif
  435. if (!f_) { // Failure.
  436. KALDI_WARN << "Failed opening pipe for reading, command is: " << cmd_name
  437. << ", errno is " << strerror(errno);
  438. return false;
  439. } else {
  440. #ifndef _MSC_VER
  441. fb_ = new PipebufType(f_, // Using this constructor won't lead the
  442. // destructor to close the stream.
  443. (binary ? std::ios_base::in | std::ios_base::binary
  444. : std::ios_base::in));
  445. KALDI_ASSERT(fb_ != NULL); // or would be alloc error.
  446. is_ = new std::istream(fb_);
  447. #else
  448. is_ = new std::ifstream(f_);
  449. #endif
  450. if (is_->fail() || is_->bad()) return false;
  451. if (is_->eof()) {
  452. KALDI_WARN << "Pipe opened with command "
  453. << PrintableRxfilename(rxfilename) << " is empty.";
  454. // don't return false: empty may be valid.
  455. }
  456. return true;
  457. }
  458. }
  459. virtual std::istream& Stream() {
  460. if (is_ == NULL)
  461. KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
  462. // I believe this error can only arise from coding error.
  463. return *is_;
  464. }
  465. virtual int32 Close() {
  466. if (is_ == NULL) KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
  467. delete is_;
  468. is_ = NULL;
  469. int32 status;
  470. #ifdef _MSC_VER
  471. status = _pclose(f_);
  472. #else
  473. status = pclose(f_);
  474. #endif
  475. if (status)
  476. KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
  477. << status;
  478. f_ = NULL;
  479. #ifndef _MSC_VER
  480. delete fb_;
  481. fb_ = NULL;
  482. #endif
  483. return status;
  484. }
  485. virtual ~PipeInputImpl() {
  486. if (is_) Close();
  487. }
  488. virtual InputType MyType() { return kPipeInput; }
  489. private:
  490. std::string filename_;
  491. FILE* f_;
  492. #ifndef _MSC_VER
  493. PipebufType* fb_;
  494. #endif
  495. std::istream* is_;
  496. };
  497. /*
  498. #else
  499. // Just have an empty implementation of the pipe input that crashes if
  500. // called.
  501. class PipeInputImpl: public InputImplBase {
  502. public:
  503. PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
  504. platform."); }
  505. virtual bool Open(const std::string, bool) { return 0; }
  506. virtual std::istream &Stream() const { return NULL; }
  507. virtual void Close() {}
  508. virtual InputType MyType() { return kPipeInput; }
  509. };
  510. #endif
  511. */
  512. class OffsetFileInputImpl : public InputImplBase {
  513. // This class is a bit more complicated than the
  514. public:
  515. // splits a filename like /my/file:123 into /my/file and the
  516. // number 123. Crashes if not this format.
  517. static void SplitFilename(const std::string& rxfilename,
  518. std::string* filename, size_t* offset) {
  519. size_t pos = rxfilename.find_last_of(':');
  520. KALDI_ASSERT(pos != std::string::npos); // would indicate error in calling
  521. // code, as the filename is supposed to be of the correct form at this
  522. // point.
  523. *filename = std::string(rxfilename, 0, pos);
  524. std::string number(rxfilename, pos + 1);
  525. bool ans = ConvertStringToInteger(number, offset);
  526. if (!ans)
  527. KALDI_ERR << "Cannot get offset from filename " << rxfilename
  528. << " (possibly you compiled in 32-bit and have a >32-bit"
  529. << " byte offset into a file; you'll have to compile 64-bit.";
  530. }
  531. bool Seek(size_t offset) {
  532. size_t cur_pos = is_.tellg();
  533. if (cur_pos == offset) {
  534. return true;
  535. } else if (cur_pos < offset && cur_pos + 100 > offset) {
  536. // We're close enough that it may be faster to just
  537. // read that data, rather than seek.
  538. for (size_t i = cur_pos; i < offset; i++) is_.get();
  539. return (is_.tellg() == std::streampos(offset));
  540. }
  541. // Try to actually seek.
  542. is_.seekg(offset, std::ios_base::beg);
  543. if (is_.fail()) { // failbit or badbit is set [error happened]
  544. is_.close();
  545. return false; // failure.
  546. } else {
  547. is_.clear(); // Clear any failure bits (e.g. eof).
  548. return true; // success.
  549. }
  550. }
  551. // This Open routine is unusual in that it is designed to work even
  552. // if it was already open. This for efficiency when seeking multiple
  553. // times.
  554. virtual bool Open(const std::string& rxfilename, bool binary) {
  555. if (is_.is_open()) {
  556. // We are opening when we have an already-open file.
  557. // We may have to seek within this file, or else close it and
  558. // open a different one.
  559. std::string tmp_filename;
  560. size_t offset;
  561. SplitFilename(rxfilename, &tmp_filename, &offset);
  562. if (tmp_filename == filename_ && binary == binary_) { // Just seek
  563. is_.clear(); // clear fail bit, etc.
  564. return Seek(offset);
  565. } else {
  566. is_.close(); // don't bother checking error status of is_.
  567. filename_ = tmp_filename;
  568. is_.open(MapOsPath(filename_).c_str(),
  569. binary ? std::ios_base::in | std::ios_base::binary
  570. : std::ios_base::in);
  571. if (!is_.is_open())
  572. return false;
  573. else
  574. return Seek(offset);
  575. }
  576. } else {
  577. size_t offset;
  578. SplitFilename(rxfilename, &filename_, &offset);
  579. binary_ = binary;
  580. is_.open(MapOsPath(filename_).c_str(),
  581. binary ? std::ios_base::in | std::ios_base::binary
  582. : std::ios_base::in);
  583. if (!is_.is_open())
  584. return false;
  585. else
  586. return Seek(offset);
  587. }
  588. }
  589. virtual std::istream& Stream() {
  590. if (!is_.is_open())
  591. KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
  592. // I believe this error can only arise from coding error.
  593. return is_;
  594. }
  595. virtual int32 Close() {
  596. if (!is_.is_open())
  597. KALDI_ERR << "FileInputImpl::Close(), file is not open.";
  598. // I believe this error can only arise from coding error.
  599. is_.close();
  600. // Don't check status.
  601. return 0;
  602. }
  603. virtual InputType MyType() { return kOffsetFileInput; }
  604. virtual ~OffsetFileInputImpl() {
  605. // Stream will automatically be closed, and we don't care about
  606. // whether it fails.
  607. }
  608. private:
  609. std::string filename_; // the actual filename
  610. bool binary_; // true if was opened in binary mode.
  611. std::ifstream is_;
  612. };
  613. Output::Output(const std::string& wxfilename, bool binary, bool write_header)
  614. : impl_(NULL) {
  615. if (!Open(wxfilename, binary, write_header)) {
  616. if (impl_) {
  617. delete impl_;
  618. impl_ = NULL;
  619. }
  620. KALDI_ERR << "Error opening output stream "
  621. << PrintableWxfilename(wxfilename);
  622. }
  623. }
  624. bool Output::Close() {
  625. if (!impl_) {
  626. return false; // error to call Close if not open.
  627. } else {
  628. bool ans = impl_->Close();
  629. delete impl_;
  630. impl_ = NULL;
  631. return ans;
  632. }
  633. }
  634. Output::~Output() {
  635. if (impl_) {
  636. bool ok = impl_->Close();
  637. delete impl_;
  638. impl_ = NULL;
  639. if (!ok)
  640. KALDI_ERR << "Error closing output file "
  641. << PrintableWxfilename(filename_)
  642. << (ClassifyWxfilename(filename_) == kFileOutput
  643. ? " (disk full?)"
  644. : "");
  645. }
  646. }
  647. std::ostream& Output::Stream() { // will throw if not open; else returns
  648. // stream.
  649. if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
  650. return impl_->Stream();
  651. }
  652. bool Output::Open(const std::string& wxfn, bool binary, bool header) {
  653. if (IsOpen()) {
  654. if (!Close()) { // Throw here rather than return status, as it's an error
  655. // about something else: if the user wanted to avoid the exception he/she
  656. // could have called Close().
  657. KALDI_ERR << "Output::Open(), failed to close output stream: "
  658. << PrintableWxfilename(filename_);
  659. }
  660. }
  661. filename_ = wxfn;
  662. OutputType type = ClassifyWxfilename(wxfn);
  663. KALDI_ASSERT(impl_ == NULL);
  664. if (type == kFileOutput) {
  665. impl_ = new FileOutputImpl();
  666. } else if (type == kStandardOutput) {
  667. impl_ = new StandardOutputImpl();
  668. } else if (type == kPipeOutput) {
  669. impl_ = new PipeOutputImpl();
  670. } else { // type == kNoOutput
  671. KALDI_WARN << "Invalid output filename format "
  672. << PrintableWxfilename(wxfn);
  673. return false;
  674. }
  675. if (!impl_->Open(wxfn, binary)) {
  676. delete impl_;
  677. impl_ = NULL;
  678. return false; // failed to open.
  679. } else { // successfully opened it.
  680. if (header) {
  681. InitKaldiOutputStream(impl_->Stream(), binary);
  682. bool ok = impl_->Stream().good(); // still OK?
  683. if (!ok) {
  684. delete impl_;
  685. impl_ = NULL;
  686. return false;
  687. }
  688. return true;
  689. } else {
  690. return true;
  691. }
  692. }
  693. }
  694. Input::Input(const std::string& rxfilename, bool* binary) : impl_(NULL) {
  695. if (!Open(rxfilename, binary)) {
  696. KALDI_ERR << "Error opening input stream "
  697. << PrintableRxfilename(rxfilename);
  698. }
  699. }
  700. int32 Input::Close() {
  701. if (impl_) {
  702. int32 ans = impl_->Close();
  703. delete impl_;
  704. impl_ = NULL;
  705. return ans;
  706. } else {
  707. return 0;
  708. }
  709. }
  710. bool Input::OpenInternal(const std::string& rxfilename, bool file_binary,
  711. bool* contents_binary) {
  712. InputType type = ClassifyRxfilename(rxfilename);
  713. if (IsOpen()) {
  714. // May have to close the stream first.
  715. if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
  716. // We want to use the same object to Open... this is in case
  717. // the files are the same, so we can just seek.
  718. if (!impl_->Open(rxfilename, file_binary)) { // true is binary mode--
  719. // always open in binary.
  720. delete impl_;
  721. impl_ = NULL;
  722. return false;
  723. }
  724. // read the binary header, if requested.
  725. if (contents_binary != NULL)
  726. return InitKaldiInputStream(impl_->Stream(), contents_binary);
  727. else
  728. return true;
  729. } else {
  730. Close();
  731. // and fall through to code below which actually opens the file.
  732. }
  733. }
  734. if (type == kFileInput) {
  735. impl_ = new FileInputImpl();
  736. } else if (type == kStandardInput) {
  737. impl_ = new StandardInputImpl();
  738. } else if (type == kPipeInput) {
  739. impl_ = new PipeInputImpl();
  740. } else if (type == kOffsetFileInput) {
  741. impl_ = new OffsetFileInputImpl();
  742. } else { // type == kNoInput
  743. KALDI_WARN << "Invalid input filename format "
  744. << PrintableRxfilename(rxfilename);
  745. return false;
  746. }
  747. if (!impl_->Open(rxfilename, file_binary)) { // true is binary mode--
  748. // always read in binary.
  749. delete impl_;
  750. impl_ = NULL;
  751. return false;
  752. }
  753. if (contents_binary != NULL)
  754. return InitKaldiInputStream(impl_->Stream(), contents_binary);
  755. else
  756. return true;
  757. }
  758. Input::~Input() {
  759. if (impl_) Close();
  760. }
  761. std::istream& Input::Stream() {
  762. if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
  763. return impl_->Stream();
  764. }
  765. // template <> void ReadKaldiObject(const std::string &filename,
  766. // Matrix<float> *m) {
  767. // if (!filename.empty() && filename[filename.size() - 1] == ']') {
  768. // // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
  769. // // (the bit in square brackets is the range).
  770. // std::string rxfilename, range;
  771. // if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
  772. // KALDI_ERR << "Could not make sense of possible range specifier in
  773. // filename "
  774. // << "while reading matrix: " << filename;
  775. // }
  776. // Matrix<float> temp;
  777. // bool binary_in;
  778. // Input ki(rxfilename, &binary_in);
  779. // temp.Read(ki.Stream(), binary_in);
  780. // if (!ExtractObjectRange(temp, range, m)) {
  781. // KALDI_ERR << "Error extracting range of object: " << filename;
  782. // }
  783. // } else {
  784. // // The normal case, there is no range.
  785. // bool binary_in;
  786. // Input ki(filename, &binary_in);
  787. // m->Read(ki.Stream(), binary_in);
  788. // }
  789. // }
  790. //
  791. // template <> void ReadKaldiObject(const std::string &filename,
  792. // Matrix<double> *m) {
  793. // if (!filename.empty() && filename[filename.size() - 1] == ']') {
  794. // // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
  795. // // (the bit in square brackets is the range).
  796. // std::string rxfilename, range;
  797. // if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
  798. // KALDI_ERR << "Could not make sense of possible range specifier in
  799. // filename "
  800. // << "while reading matrix: " << filename;
  801. // }
  802. // Matrix<double> temp;
  803. // bool binary_in;
  804. // Input ki(rxfilename, &binary_in);
  805. // temp.Read(ki.Stream(), binary_in);
  806. // if (!ExtractObjectRange(temp, range, m)) {
  807. // KALDI_ERR << "Error extracting range of object: " << filename;
  808. // }
  809. // } else {
  810. // // The normal case, there is no range.
  811. // bool binary_in;
  812. // Input ki(filename, &binary_in);
  813. // m->Read(ki.Stream(), binary_in);
  814. // }
  815. // }
  816. } // end namespace kaldi