mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
RefCountingHashMap.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <mutable/util/fn.hpp>
5#include <algorithm>
6#include <cassert>
7#include <cmath>
8#include <cstdint>
9#include <cstdlib>
10#include <functional>
11#include <iomanip>
12#include <iostream>
13#include <memory>
14#include <stdexcept>
15#include <utility>
16
17
18namespace m {
19
20
21/*======================================================================================================================
22 * This class implements an open addressing hash map that uses reference counting for fast collision resolution.
23 *====================================================================================================================*/
24
25template<
26 typename Key,
27 typename Value,
28 typename Hash = std::hash<Key>,
29 typename KeyEqual = std::equal_to<Key>
30>
32{
33 using key_type = Key;
35 using value_type = std::pair<const Key, Value>;
36 using hasher = Hash;
37 using key_equal = KeyEqual;
39 using const_pointer = const value_type*;
42 using size_type = std::size_t;
43 using difference_type = std::ptrdiff_t;
44
45 private:
47 {
49 uint32_t probe_length = 0;
52 };
53
54 template<bool C>
56 {
57 friend struct RefCountingHashMap;
58
59 static constexpr bool Is_Const = C;
60
61 using map_type = std::conditional_t<Is_Const, const RefCountingHashMap, RefCountingHashMap>;
62 using bucket_type = std::conditional_t<Is_Const, const typename map_type::entry_type, typename map_type::entry_type>;
63 using pointer = std::conditional_t<Is_Const, const typename map_type::const_pointer, typename map_type::pointer>;
64 using reference = std::conditional_t<Is_Const, const typename map_type::const_reference, typename map_type::reference>;
65
66 private:
68 bucket_type *bucket_ = nullptr;
69
70 public:
72
74 do {
75 ++bucket_;
76 } while (bucket_ != map_.table_ + map_.capacity_ and bucket_->probe_length == 0);
77 return *this;
78 }
79
81 the_iterator tmp = *this;
82 operator++();
83 return tmp;
84 }
85
86 reference operator*() const { return bucket_->value; }
87 pointer operator->() const { return &bucket_->value; }
88
89 bool operator==(the_iterator other) const { return this->bucket_ == other.bucket_; }
90 bool operator!=(the_iterator other) const { return not operator==(other); }
91 };
92
93 template<bool C>
95 {
96 friend struct RefCountingHashMap;
97
98 static constexpr bool Is_Const = C;
99
100 using map_type = std::conditional_t<Is_Const, const RefCountingHashMap, RefCountingHashMap>;
101 using bucket_type = std::conditional_t<Is_Const, const typename map_type::entry_type, typename map_type::entry_type>;
102 using pointer = std::conditional_t<Is_Const, const typename map_type::const_pointer, typename map_type::pointer>;
103 using reference = std::conditional_t<Is_Const, const typename map_type::const_reference, typename map_type::reference>;
104 using size_type = typename map_type::size_type;
105
106 private:
111
112 public:
114 : map_(map)
116 {
117 max_step_ = bucket()->probe_length;
118 }
119
120 bool has_next() const { return step_ < max_step_; }
121
123 ++step_;
125 return *this;
126 }
127
129 the_bucket_iterator tmp = *this;
130 operator++();
131 return tmp;
132 }
133
134 reference operator*() const { return map_.table_[bucket_index_].value; }
135 pointer operator->() const { return &map_.table_[bucket_index_].value; }
136
137 size_type probe_length() const { return step_; }
138 size_type probe_distance() const { return (step_ * (step_ + 1)) / 2; }
140 size_type bucket_index() const { return map_.masked(current_index() - probe_distance()); }
141
142 private:
143 bucket_type * bucket() const { return map_.table_ + current_index(); }
144 };
145
146 public:
149
152
153 private:
154 const hasher h_;
156
158 entry_type *table_ = nullptr;
166 float max_load_factor_ = .85;
167
168 public:
170 const hasher &hash = hasher(),
171 const key_equal &equal = key_equal())
172 : h_(hash)
173 , eq_(equal)
174 , capacity_(ceil_to_pow_2(bucket_count))
175 {
176 if (bucket_count == 0)
177 throw std::invalid_argument("bucket_count must not be zero");
178
179 /* Allocate and initialize table. */
181 initialize();
182
183 /* Compute high watermark. */
185 }
186
188 for (auto p = table_, end = table_ + capacity_; p != end; ++p) {
189 if (p->probe_length != 0)
190 p->~entry_type();
191 }
192 free(table_);
193 }
194
195 size_type capacity() const { return capacity_; }
196 size_type size() const { return size_; }
197 size_type mask() const { return capacity_ - size_type(1); }
198 size_type masked(size_type index) const { return index & mask(); }
199 float max_load_factor() const { return max_load_factor_; }
200 void max_load_factor(float ml) {
201 max_load_factor_ = std::clamp(ml, .0f, .99f);
203 }
205
206 iterator begin() { return iterator(*this, table_); }
207 iterator end() { return iterator(*this, table_ + capacity()); }
208 const_iterator begin() const { return const_iterator(*this, table_); }
209 const_iterator end() const { return const_iterator(*this, table_ + capacity()); }
210 const_iterator cbegin() const { return begin(); }
211 const_iterator cend() const { return end(); }
212
214 if (size_ >= watermark_high_)
215 resize(2 * capacity_);
216
217 const auto hash = h_(key);
218 const size_type index = masked(hash);
219 entry_type * const bucket = table_ + index;
220
221 if (bucket->probe_length == 0) [[likely]] { // bucket is free
223 new (&bucket->value) value_type(std::move(key), std::move(value));
224 ++size_;
225 return iterator(*this, bucket);
226 }
227
228 /* Compute distance to end of probe sequence. */
230 M_insist(distance > 0, "the distance must not be 0, otherwise we would have run into the likely case above");
231
232 /* Search next free slot in bucket's probe sequence. */
233 entry_type *probe = table_ + masked(index + distance);
234 M_insist(probe != bucket, "the probed slot must not be the original bucket as the distance is not 0 and always "
235 "less than capacity");
236 while (probe->probe_length != 0) {
238 distance += bucket->probe_length;
239 probe = table_ + masked(index + distance);
240 }
241
242 /* Found free slot in bucket's probe sequence. Place element in slot and update probe length. */
243 ++probe->probe_length; // set probe_length from 0 to 1
245 new (&probe->value) value_type(std::move(key), std::move(value));
246 ++size_;
247 return iterator(*this, probe);
248 }
249
250 std::pair<iterator, bool> insert_without_duplicates(key_type key, mapped_type value) {
251 if (size_ >= watermark_high_)
252 resize(2 * capacity_);
253
254 const auto hash = h_(key);
255 const size_type index = masked(hash);
256 entry_type * const bucket = table_ + index;
257
258 entry_type *probe = bucket;
259 size_type insertion_probe_length = 0;
260 size_type insertion_probe_distance = 0;
261 while (probe->probe_length != 0) {
262 if (eq_(key, probe->value.first))
263 return std::make_pair(iterator(*this, probe), false); // duplicate key
264 ++insertion_probe_length;
265 insertion_probe_distance += insertion_probe_length;
266 probe = table_ + masked(index + insertion_probe_distance);
267 }
268
269 ++probe->probe_length; // set probe_length from 0 to 1
270 bucket->probe_length = insertion_probe_length + 1;
271 new (&probe->value) value_type(std::move(key), std::move(value));
272 ++size_;
273 return std::make_pair(iterator(*this, probe), true);
274 }
275
276 iterator find(const key_type &key) {
277 const auto hash = h_(key);
278 const size_type index = masked(hash);
279 entry_type * const bucket = table_ + index;
280
281#if 1
282 /* Search the probe sequence in natural order. */
283 entry_type *probe = bucket;
284 size_type lookup_probe_length = 0;
285 size_type lookup_probe_distance = 0;
286 while (probe->probe_length != 0 and lookup_probe_length < bucket->probe_length) {
287 if (eq_(key, probe->value.first))
288 return iterator(*this, probe);
289 ++lookup_probe_length;
290 lookup_probe_distance += lookup_probe_length;
291 probe = table_ + masked(index + lookup_probe_distance);
292 }
293#else
294 /* Search the probe sequence in inversed order, starting at the last element of this bucket. */
295 size_type lookup_probe_length = bucket->probe_length;
296 size_type lookup_probe_distance = (lookup_probe_length * (lookup_probe_length - 1)) >> 1;
297 while (lookup_probe_length != 0) {
298 entry_type *probe = table_ + masked(index + lookup_probe_distance);
299 if (eq_(key, probe->value.first))
300 return iterator(*this, probe);
301 --lookup_probe_length;
302 lookup_probe_distance -= lookup_probe_length;
303 }
304#endif
305 return end();
306 }
307 const_iterator find(const key_type &key) const {
308 return const_iterator(*this, const_cast<RefCountingHashMap*>(this)->find(key).bucket_);
309 }
310
312 const auto hash = h_(key);
313 const size_type index = masked(hash);
314 return bucket_iterator(*this, index);
315 }
317 return const_bucket_iterator(*this, const_cast<RefCountingHashMap*>(this)->bucket(key).bucket_index_);
318 }
319
320 void for_all(const key_type &key, std::function<void(value_type&)> callback) {
321 for (auto it = bucket(key); it.has_next(); ++it) {
322 if (eq_(key, it->first))
323 callback(*it);
324 }
325 }
326 void for_all(const key_type &key, std::function<void(const value_type&)> callback) const {
327 for (auto it = bucket(key); it.has_next(); ++it) {
328 if (eq_(key, it->first))
329 callback(*it);
330 }
331 }
332
333 size_type count(const key_type &key) const {
334 size_type cnt = 0;
335 for_all(key, [&cnt](auto) { ++cnt; });
336 return cnt;
337 }
338
340 private:
341 void rehash(std::size_t new_capacity) {
342 M_insist((new_capacity & (new_capacity - 1)) == 0, "not a power of 2");
343 M_insist(size_ <= watermark_high_, "there are more elements to rehash than the high watermark allows");
344
345 auto old_table = table_;
346 table_ = allocate(new_capacity);
347 auto old_capacity = capacity_;
348 capacity_ = new_capacity;
349 size_ = 0;
350 initialize();
351
352 for (auto runner = old_table, end = old_table + old_capacity; runner != end; ++runner) {
353 if (runner->probe_length) {
354 auto &key_ref = const_cast<key_type&>(runner->value.first); // hack around the `const key_type`
355 insert_with_duplicates(std::move(key_ref), std::move(runner->value.second));
356 }
357 }
358
359 free(old_table);
360 }
361
362 public:
364
365 void resize(std::size_t new_capacity) {
366 new_capacity = std::max<decltype(new_capacity)>(new_capacity, std::ceil(size() / max_load_factor()));
367 new_capacity = ceil_to_pow_2(new_capacity);
368
369 if (new_capacity != capacity_) {
370 watermark_high_ = new_capacity * max_load_factor();
372 rehash(new_capacity);
373 }
374 }
375
376 void shrink_to_fit() { resize(size()); }
377
379 friend std::ostream & operator<<(std::ostream &out, const RefCountingHashMap &map) {
380 size_type log2 = log2_ceil(map.capacity());
381 size_type log10 = size_type(std::ceil(double(log2) / 3.322));
382 for (size_type i = 0; i != map.capacity_; ++i) {
383 auto &entry = map.table_[i];
384 out << '[' << std::setw(log10) << i << "]: probe length = " << std::setw(log10) << entry.probe_length;
385 if (entry.probe_length) {
386 out << ", value = (" << entry.value.first << ", " << entry.value.second << ')';
387 }
388 out << '\n';
389 }
390 return out;
391 }
392
393 void dump(std::ostream &out) const { out << *this; out.flush(); }
394 void dump() const { dump(std::cerr); }
396
397 private:
398 static entry_type * allocate(size_type n, entry_type *hint = nullptr) {
399 auto p = static_cast<entry_type*>(realloc(hint, n * sizeof(entry_type)));
400 if (p == nullptr)
401 throw std::runtime_error("allocation failed");
402 return p;
403 }
404
405 void initialize() {
406 for (auto runner = table_, end = table_ + capacity_; runner != end; ++runner)
407 new (runner) entry_type();
408 }
409};
410
411}
Check whether.
Definition: concepts.hpp:39
#define M_insist(...)
Definition: macro.hpp:129
‍mutable namespace
Definition: Backend.hpp:10
bool M_EXPORT equal(const T &first, const U &second)
Checks whether first and second are equal considering permutations.
Definition: fn.hpp:391
and
Definition: enum_ops.hpp:12
uint32_t probe_length
Counts the length of the probe sequence.
value_type value
The value stored in this entry.
std::conditional_t< Is_Const, const typename map_type::entry_type, typename map_type::entry_type > bucket_type
the_bucket_iterator(map_type &map, size_type bucket_index)
std::conditional_t< Is_Const, const RefCountingHashMap, RefCountingHashMap > map_type
std::conditional_t< Is_Const, const typename map_type::const_reference, typename map_type::reference > reference
std::conditional_t< Is_Const, const RefCountingHashMap, RefCountingHashMap > map_type
std::conditional_t< Is_Const, const typename map_type::entry_type, typename map_type::entry_type > bucket_type
std::conditional_t< Is_Const, const typename map_type::const_reference, typename map_type::reference > reference
bool operator==(the_iterator other) const
the_iterator(map_type &map, entry_type *bucket)
bool operator!=(the_iterator other) const
const_iterator begin() const
size_type masked(size_type index) const
const_iterator end() const
the_iterator< false > iterator
void rehash(std::size_t new_capacity)
Rehash all elements.
const_bucket_iterator bucket(const key_type &key) const
const_iterator cend() const
const value_type & const_reference
size_type watermark_high() const
the_bucket_iterator< false > bucket_iterator
M_LCOV_EXCL_START friend std::ostream & operator<<(std::ostream &out, const RefCountingHashMap &map)
void dump(std::ostream &out) const
size_type count(const key_type &key) const
size_type capacity() const
static entry_type * allocate(size_type n, entry_type *hint=nullptr)
std::pair< iterator, bool > insert_without_duplicates(key_type key, mapped_type value)
std::pair< const Key, Value > value_type
size_type capacity_
The total number of entries allocated in the table.
size_type size_
The number of occupied entries in the table.
std::ptrdiff_t difference_type
const_iterator find(const key_type &key) const
const_iterator cbegin() const
iterator insert_with_duplicates(key_type key, mapped_type value)
float max_load_factor_
The maximum load factor before resizing.
bucket_iterator bucket(const key_type &key)
the_bucket_iterator< true > const_bucket_iterator
RefCountingHashMap(size_type bucket_count, const hasher &hash=hasher(), const key_equal &equal=key_equal())
the_iterator< true > const_iterator
void for_all(const key_type &key, std::function< void(const value_type &)> callback) const
iterator find(const key_type &key)
size_type watermark_high_
The maximum size before resizing.
entry_type * table_
A pointer to the beginning of the table.
void resize(std::size_t new_capacity)
const value_type * const_pointer
void for_all(const key_type &key, std::function< void(value_type &)> callback)
This class holds a SQL attribute value.
Definition: Tuple.hpp:19