You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

329 lines
11 KiB

  1. // base/io-funcs-inl.h
  2. // Copyright 2009-2011 Microsoft Corporation; Saarland University;
  3. // Jan Silovsky; Yanmin Qian;
  4. // Johns Hopkins University (Author: Daniel Povey)
  5. // 2016 Xiaohui Zhang
  6. // See ../../COPYING for clarification regarding multiple authors
  7. //
  8. // Licensed under the Apache License, Version 2.0 (the "License");
  9. // you may not use this file except in compliance with the License.
  10. // You may obtain a copy of the License at
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  13. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  14. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  15. // MERCHANTABLITY OR NON-INFRINGEMENT.
  16. // See the Apache 2 License for the specific language governing permissions and
  17. // limitations under the License.
  18. #ifndef KALDI_BASE_IO_FUNCS_INL_H_
  19. #define KALDI_BASE_IO_FUNCS_INL_H_ 1
  20. // Do not include this file directly. It is included by base/io-funcs.h
  21. #include <limits>
  22. #include <utility>
  23. #include <vector>
  24. namespace kaldi {
  25. // Template that covers integers.
  26. template <class T>
  27. void WriteBasicType(std::ostream& os, bool binary, T t) {
  28. // Compile time assertion that this is not called with a wrong type.
  29. KALDI_ASSERT_IS_INTEGER_TYPE(T);
  30. if (binary) {
  31. char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1) *
  32. static_cast<char>(sizeof(t));
  33. os.put(len_c);
  34. os.write(reinterpret_cast<const char*>(&t), sizeof(t));
  35. } else {
  36. if (sizeof(t) == 1)
  37. os << static_cast<int16>(t) << " ";
  38. else
  39. os << t << " ";
  40. }
  41. if (os.fail()) {
  42. KALDI_ERR << "Write failure in WriteBasicType.";
  43. }
  44. }
  45. // Template that covers integers.
  46. template <class T>
  47. inline void ReadBasicType(std::istream& is, bool binary, T* t) {
  48. KALDI_PARANOID_ASSERT(t != NULL);
  49. // Compile time assertion that this is not called with a wrong type.
  50. KALDI_ASSERT_IS_INTEGER_TYPE(T);
  51. if (binary) {
  52. int len_c_in = is.get();
  53. if (len_c_in == -1)
  54. KALDI_ERR << "ReadBasicType: encountered end of stream.";
  55. char len_c = static_cast<char>(len_c_in),
  56. len_c_expected = (std::numeric_limits<T>::is_signed ? 1 : -1) *
  57. static_cast<char>(sizeof(*t));
  58. if (len_c != len_c_expected) {
  59. KALDI_ERR << "ReadBasicType: did not get expected integer type, "
  60. << static_cast<int>(len_c) << " vs. "
  61. << static_cast<int>(len_c_expected)
  62. << ". You can change this code to successfully"
  63. << " read it later, if needed.";
  64. // insert code here to read "wrong" type. Might have a switch statement.
  65. }
  66. is.read(reinterpret_cast<char*>(t), sizeof(*t));
  67. } else {
  68. if (sizeof(*t) == 1) {
  69. int16 i;
  70. is >> i;
  71. *t = i;
  72. } else {
  73. is >> *t;
  74. }
  75. }
  76. if (is.fail()) {
  77. KALDI_ERR << "Read failure in ReadBasicType, file position is "
  78. << is.tellg() << ", next char is " << is.peek();
  79. }
  80. }
  81. // Template that covers integers.
  82. template <class T>
  83. inline void WriteIntegerPairVector(std::ostream& os, bool binary,
  84. const std::vector<std::pair<T, T> >& v) {
  85. // Compile time assertion that this is not called with a wrong type.
  86. KALDI_ASSERT_IS_INTEGER_TYPE(T);
  87. if (binary) {
  88. char sz = sizeof(T); // this is currently just a check.
  89. os.write(&sz, 1);
  90. int32 vecsz = static_cast<int32>(v.size());
  91. KALDI_ASSERT((size_t)vecsz == v.size());
  92. os.write(reinterpret_cast<const char*>(&vecsz), sizeof(vecsz));
  93. if (vecsz != 0) {
  94. os.write(reinterpret_cast<const char*>(&(v[0])), sizeof(T) * vecsz * 2);
  95. }
  96. } else {
  97. // focus here is on prettiness of text form rather than
  98. // efficiency of reading-in.
  99. // reading-in is dominated by low-level operations anyway:
  100. // for efficiency use binary.
  101. os << "[ ";
  102. typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
  103. end = v.end();
  104. for (; iter != end; ++iter) {
  105. if (sizeof(T) == 1)
  106. os << static_cast<int16>(iter->first) << ','
  107. << static_cast<int16>(iter->second) << ' ';
  108. else
  109. os << iter->first << ',' << iter->second << ' ';
  110. }
  111. os << "]\n";
  112. }
  113. if (os.fail()) {
  114. KALDI_ERR << "Write failure in WriteIntegerPairVector.";
  115. }
  116. }
  117. // Template that covers integers.
  118. template <class T>
  119. inline void ReadIntegerPairVector(std::istream& is, bool binary,
  120. std::vector<std::pair<T, T> >* v) {
  121. KALDI_ASSERT_IS_INTEGER_TYPE(T);
  122. KALDI_ASSERT(v != NULL);
  123. if (binary) {
  124. int sz = is.peek();
  125. if (sz == sizeof(T)) {
  126. is.get();
  127. } else { // this is currently just a check.
  128. KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
  129. << sizeof(T) << ", saw instead " << sz << ", at file position "
  130. << is.tellg();
  131. }
  132. int32 vecsz;
  133. is.read(reinterpret_cast<char*>(&vecsz), sizeof(vecsz));
  134. if (is.fail() || vecsz < 0) goto bad;
  135. v->resize(vecsz);
  136. if (vecsz > 0) {
  137. is.read(reinterpret_cast<char*>(&((*v)[0])), sizeof(T) * vecsz * 2);
  138. }
  139. } else {
  140. std::vector<std::pair<T, T> > tmp_v; // use temporary so v doesn't use
  141. // extra memory due to resizing.
  142. is >> std::ws;
  143. if (is.peek() != static_cast<int>('[')) {
  144. KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw " << is.peek()
  145. << ", at file position " << is.tellg();
  146. }
  147. is.get(); // consume the '['.
  148. is >> std::ws; // consume whitespace.
  149. while (is.peek() != static_cast<int>(']')) {
  150. if (sizeof(T) == 1) { // read/write chars as numbers.
  151. int16 next_t1, next_t2;
  152. is >> next_t1;
  153. if (is.fail()) goto bad;
  154. if (is.peek() != static_cast<int>(','))
  155. KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
  156. << is.peek() << ", at file position " << is.tellg();
  157. is.get(); // consume the ','.
  158. is >> next_t2 >> std::ws;
  159. if (is.fail())
  160. goto bad;
  161. else
  162. tmp_v.push_back(std::make_pair((T)next_t1, (T)next_t2));
  163. } else {
  164. T next_t1, next_t2;
  165. is >> next_t1;
  166. if (is.fail()) goto bad;
  167. if (is.peek() != static_cast<int>(','))
  168. KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
  169. << is.peek() << ", at file position " << is.tellg();
  170. is.get(); // consume the ','.
  171. is >> next_t2 >> std::ws;
  172. if (is.fail())
  173. goto bad;
  174. else
  175. tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
  176. }
  177. }
  178. is.get(); // get the final ']'.
  179. *v = tmp_v; // could use std::swap to use less temporary memory, but this
  180. // uses less permanent memory.
  181. }
  182. if (!is.fail()) return;
  183. bad:
  184. KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
  185. << is.tellg();
  186. }
  187. template <class T>
  188. inline void WriteIntegerVector(std::ostream& os, bool binary,
  189. const std::vector<T>& v) {
  190. // Compile time assertion that this is not called with a wrong type.
  191. KALDI_ASSERT_IS_INTEGER_TYPE(T);
  192. if (binary) {
  193. char sz = sizeof(T); // this is currently just a check.
  194. os.write(&sz, 1);
  195. int32 vecsz = static_cast<int32>(v.size());
  196. KALDI_ASSERT((size_t)vecsz == v.size());
  197. os.write(reinterpret_cast<const char*>(&vecsz), sizeof(vecsz));
  198. if (vecsz != 0) {
  199. os.write(reinterpret_cast<const char*>(&(v[0])), sizeof(T) * vecsz);
  200. }
  201. } else {
  202. // focus here is on prettiness of text form rather than
  203. // efficiency of reading-in.
  204. // reading-in is dominated by low-level operations anyway:
  205. // for efficiency use binary.
  206. os << "[ ";
  207. typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
  208. for (; iter != end; ++iter) {
  209. if (sizeof(T) == 1)
  210. os << static_cast<int16>(*iter) << " ";
  211. else
  212. os << *iter << " ";
  213. }
  214. os << "]\n";
  215. }
  216. if (os.fail()) {
  217. KALDI_ERR << "Write failure in WriteIntegerVector.";
  218. }
  219. }
  220. template <class T>
  221. inline void ReadIntegerVector(std::istream& is, bool binary,
  222. std::vector<T>* v) {
  223. KALDI_ASSERT_IS_INTEGER_TYPE(T);
  224. KALDI_ASSERT(v != NULL);
  225. if (binary) {
  226. int sz = is.peek();
  227. if (sz == sizeof(T)) {
  228. is.get();
  229. } else { // this is currently just a check.
  230. KALDI_ERR << "ReadIntegerVector: expected to see type of size "
  231. << sizeof(T) << ", saw instead " << sz << ", at file position "
  232. << is.tellg();
  233. }
  234. int32 vecsz;
  235. is.read(reinterpret_cast<char*>(&vecsz), sizeof(vecsz));
  236. if (is.fail() || vecsz < 0) goto bad;
  237. v->resize(vecsz);
  238. if (vecsz > 0) {
  239. is.read(reinterpret_cast<char*>(&((*v)[0])), sizeof(T) * vecsz);
  240. }
  241. } else {
  242. std::vector<T> tmp_v; // use temporary so v doesn't use extra memory
  243. // due to resizing.
  244. is >> std::ws;
  245. if (is.peek() != static_cast<int>('[')) {
  246. KALDI_ERR << "ReadIntegerVector: expected to see [, saw " << is.peek()
  247. << ", at file position " << is.tellg();
  248. }
  249. is.get(); // consume the '['.
  250. is >> std::ws; // consume whitespace.
  251. while (is.peek() != static_cast<int>(']')) {
  252. if (sizeof(T) == 1) { // read/write chars as numbers.
  253. int16 next_t;
  254. is >> next_t >> std::ws;
  255. if (is.fail())
  256. goto bad;
  257. else
  258. tmp_v.push_back((T)next_t);
  259. } else {
  260. T next_t;
  261. is >> next_t >> std::ws;
  262. if (is.fail())
  263. goto bad;
  264. else
  265. tmp_v.push_back(next_t);
  266. }
  267. }
  268. is.get(); // get the final ']'.
  269. *v = tmp_v; // could use std::swap to use less temporary memory, but this
  270. // uses less permanent memory.
  271. }
  272. if (!is.fail()) return;
  273. bad:
  274. KALDI_ERR << "ReadIntegerVector: read failure at file position "
  275. << is.tellg();
  276. }
  277. // Initialize an opened stream for writing by writing an optional binary
  278. // header and modifying the floating-point precision.
  279. inline void InitKaldiOutputStream(std::ostream& os, bool binary) {
  280. // This does not throw exceptions (does not check for errors).
  281. if (binary) {
  282. os.put('\0');
  283. os.put('B');
  284. }
  285. // Note, in non-binary mode we may at some point want to mess with
  286. // the precision a bit.
  287. // 7 is a bit more than the precision of float..
  288. if (os.precision() < 7) os.precision(7);
  289. }
  290. /// Initialize an opened stream for reading by detecting the binary header and
  291. // setting the "binary" value appropriately.
  292. inline bool InitKaldiInputStream(std::istream& is, bool* binary) {
  293. // Sets the 'binary' variable.
  294. // Throws exception in the very unusual situation that stream
  295. // starts with '\0' but not then 'B'.
  296. if (is.peek() == '\0') { // seems to be binary
  297. is.get();
  298. if (is.peek() != 'B') {
  299. return false;
  300. }
  301. is.get();
  302. *binary = true;
  303. return true;
  304. } else {
  305. *binary = false;
  306. return true;
  307. }
  308. }
  309. } // end namespace kaldi.
  310. #endif // KALDI_BASE_IO_FUNCS_INL_H_