You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

146 lines
5.6 KiB

  1. // util/hash-list.h
  2. // Copyright 2009-2011 Microsoft Corporation
  3. // 2013 Johns Hopkins University (author: Daniel Povey)
  4. // See ../../COPYING for clarification regarding multiple authors
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License");
  7. // you may not use this file except in compliance with the License.
  8. // You may obtain a copy of the License at
  9. //
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. //
  12. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  13. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  14. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  15. // MERCHANTABLITY OR NON-INFRINGEMENT.
  16. // See the Apache 2 License for the specific language governing permissions and
  17. // limitations under the License.
  18. #ifndef KALDI_UTIL_HASH_LIST_H_
  19. #define KALDI_UTIL_HASH_LIST_H_
  20. #include <algorithm>
  21. #include <cassert>
  22. #include <limits>
  23. #include <set>
  24. #include <vector>
  25. #include "base/kaldi-error.h"
  26. /* This header provides utilities for a structure that's used in a decoder (but
  27. is quite generic in nature so we implement and test it separately).
  28. Basically it's a singly-linked list, but implemented in such a way that we
  29. can quickly search for elements in the list. We give it a slightly richer
  30. interface than just a hash and a list. The idea is that we want to separate
  31. the hash part and the list part: basically, in the decoder, we want to have a
  32. single hash for the current frame and the next frame, because by the time we
  33. need to access the hash for the next frame we no longer need the hash for the
  34. previous frame. So we have an operation that clears the hash but leaves the
  35. list structure intact. We also control memory management inside this object,
  36. to avoid repeated new's/deletes.
  37. See hash-list-test.cc for an example of how to use this object.
  38. */
  39. namespace kaldi {
  40. template <class I, class T>
  41. class HashList {
  42. public:
  43. struct Elem {
  44. I key;
  45. T val;
  46. Elem* tail;
  47. };
  48. /// Constructor takes no arguments.
  49. /// Call SetSize to inform it of the likely size.
  50. HashList();
  51. /// Clears the hash and gives the head of the current list to the user;
  52. /// ownership is transferred to the user (the user must call Delete()
  53. /// for each element in the list, at his/her leisure).
  54. Elem* Clear();
  55. /// Gives the head of the current list to the user. Ownership retained in the
  56. /// class. Caution: in December 2013 the return type was changed to const
  57. /// Elem* and this function was made const. You may need to change some types
  58. /// of local Elem* variables to const if this produces compilation errors.
  59. const Elem* GetList() const;
  60. /// Think of this like delete(). It is to be called for each Elem in turn
  61. /// after you "obtained ownership" by doing Clear(). This is not the opposite
  62. /// of. Insert, it is the opposite of New. It's really a memory operation.
  63. inline void Delete(Elem* e);
  64. /// This should probably not be needed to be called directly by the user.
  65. /// Think of it as opposite
  66. /// to Delete();
  67. inline Elem* New();
  68. /// Find tries to find this element in the current list using the hashtable.
  69. /// It returns NULL if not present. The Elem it returns is not owned by the
  70. /// user, it is part of the internal list owned by this object, but the user
  71. /// is free to modify the "val" element.
  72. inline Elem* Find(I key);
  73. /// Insert inserts a new element into the hashtable/stored list.
  74. /// Because element keys in a hashtable are unique, this operation checks
  75. /// whether each inserted element has a key equivalent to the one of an
  76. /// element already in the hashtable. If so, the element is not inserted,
  77. /// returning an pointer to this existing element.
  78. inline Elem* Insert(I key, T val);
  79. /// Insert inserts another element with same key into the hashtable/
  80. /// stored list.
  81. /// By calling this, the user asserts that one element with that key is
  82. /// already present.
  83. /// We insert it that way, that all elements with the same key
  84. /// follow each other.
  85. /// Find() will return the first one of the elements with the same key.
  86. inline void InsertMore(I key, T val);
  87. /// SetSize tells the object how many hash buckets to allocate (should
  88. /// typically be at least twice the number of objects we expect to go in the
  89. /// structure, for fastest performance). It must be called while the hash
  90. /// is empty (e.g. after Clear() or after initializing the object, but before
  91. /// adding anything to the hash.
  92. void SetSize(size_t sz);
  93. /// Returns current number of hash buckets.
  94. inline size_t Size() { return hash_size_; }
  95. ~HashList();
  96. private:
  97. struct HashBucket {
  98. size_t prev_bucket; // index to next bucket (-1 if list tail). Note:
  99. // list of buckets goes in opposite direction to list of Elems.
  100. Elem* last_elem; // pointer to last element in this bucket (NULL if empty)
  101. inline HashBucket(size_t i, Elem* e) : prev_bucket(i), last_elem(e) {}
  102. };
  103. Elem* list_head_; // head of currently stored list.
  104. size_t bucket_list_tail_; // tail of list of active hash buckets.
  105. size_t hash_size_; // number of hash buckets.
  106. std::vector<HashBucket> buckets_;
  107. Elem* freed_head_; // head of list of currently freed elements. [ready for
  108. // allocation]
  109. std::vector<Elem*> allocated_; // list of allocated blocks.
  110. static const size_t allocate_block_size_ = 1024; // Number of Elements to
  111. // allocate in one block. Must be largish so storing allocated_ doesn't
  112. // become a problem.
  113. };
  114. } // end namespace kaldi
  115. #include "util/hash-list-inl.h"
  116. #endif // KALDI_UTIL_HASH_LIST_H_