mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
Index.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <algorithm>
4#include <cmath>
5#include <cstring>
6#include <iostream>
11#include <utility>
12#include <vector>
13
14
15namespace m {
16
17// Forward declarations
18struct Table;
19struct Schema;
20
21namespace idx {
22
24enum class IndexMethod { Array, Rmi };
25
28{
29 protected:
30 IndexBase() = default;
31
32 public:
33 IndexBase(IndexBase&&) = default;
34 virtual ~IndexBase() { }
35
37 virtual void bulkload(const Table &table, const Schema &key_schema) = 0;
38 /* Returns the number of entries in the index. */
39 virtual std::size_t num_entries() const = 0;
41 virtual IndexMethod method() const = 0;
43 virtual const memory::Memory & memory() const = 0;
44 /* Returns the size of the index in bytes. */
45 virtual std::size_t size_in_bytes() const = 0;
46
47 virtual void dump(std::ostream &out) const = 0;
48 virtual void dump() const = 0;
49
50 protected:
52 static std::string build_query(const Table &table, const Schema &schema);
53};
54
56template<typename Key>
58{
59#ifndef NDEBUG
60 static constexpr std::size_t ALLOCATION_SIZE = 1UL << 30;
61#else
62 static constexpr std::size_t ALLOCATION_SIZE = 1UL << 37;
63#endif
64
65 using key_type = Key;
66 using value_type = std::size_t;
67 using entry_type = std::pair<key_type, value_type>;
69 using const_iterator = const entry_type*;
70
71 protected:
74 std::size_t num_entries_;
76
78 struct
79 {
80 template<typename T, typename U>
81 requires (std::same_as<T, key_type> or std::same_as<T, entry_type>) and
82 (std::same_as<U, key_type> or std::same_as<U, entry_type>)
83 bool operator()(const T &_lhs, const U &_rhs) const {
84 key_type lhs = M_CONSTEXPR_COND((std::same_as<T, key_type>), _lhs, _lhs.first);
85 key_type rhs = M_CONSTEXPR_COND((std::same_as<U, key_type>), _rhs, _rhs.first);
86 if constexpr(std::same_as<key_type, const char*>)
87 return std::strcmp(lhs, rhs) < 0;
88 else
89 return lhs < rhs;
90 }
91 } cmp;
92
93 public:
94 ArrayIndex();
95
99 void bulkload(const Table &table, const Schema &key_schema) override;
100
102 std::size_t num_entries() const override { return num_entries_; }
103
105 IndexMethod method() const override { return IndexMethod::Array; }
106
108 const memory::Memory & memory() const override { return memory_; }
109
110 /* Returns the size of the index in bytes. */
111 std::size_t size_in_bytes() const override { return num_entries_ * sizeof(entry_type); }
112
113 public:
116 void add(const key_type key, const value_type value);
117
119 virtual void finalize() {
120 std::sort(begin(), end(), cmp);
121 finalized_ = true;
122 }
123
125 bool finalized() const { return finalized_; }
126
130 virtual const_iterator lower_bound(const key_type key) const {
131 if (not finalized_) throw m::exception("Index is not finalized.");
132 return std::lower_bound(begin(), end(), entry_type{key, value_type()}, cmp);
133 }
134
138 virtual const_iterator upper_bound(const key_type key) const {
139 if (not finalized_) throw m::exception("Index is not finalized.");
140 return std::upper_bound(begin(), end(), entry_type{key, value_type()}, cmp);
141 }
142
144 protected:
145 iterator begin() { return static_cast<entry_type*>(memory_.addr()); }
146 public:
147 const_iterator begin() const { return static_cast<const entry_type*>(memory_.addr()); }
148 const_iterator cbegin() const { return begin(); }
150 protected:
151 iterator end() { return static_cast<entry_type*>(memory_.addr()) + num_entries_; }
152 public:
153 const_iterator end() const { return static_cast<const entry_type*>(memory_.addr()) + num_entries_; }
154 const_iterator cend() const { return end(); }
155
156 void dump(std::ostream &out) const override { out << "ArrayIndex<" << typeid(key_type).name() << '>' << std::endl; }
157 void dump() const override { dump(std::cerr); }
158};
159
161template<arithmetic Key>
163{
169
171 {
172 double slope;
173 double intercept;
174
176
177 double operator()(const key_type x) const { return std::fma(slope, static_cast<double>(x), intercept); }
178
182 const std::size_t offset = 0, const double compression_factor = 1.0)
183 {
184 std::size_t n = std::distance(first, last);
185 if (n == 0) return { 0.0, 0.0 };
186 if (n == 1) return { 0.0, static_cast<double>(offset) * compression_factor };
187 double numerator = static_cast<double>(n); // (offset + n) - offset
188 double denominator = static_cast<double>((*(last - 1)).first - (*first).first);
189 double slope = denominator != 0 ? numerator/denominator * compression_factor : 0.0;
190 double intercept = offset * compression_factor - slope * (*first).first;
191 return { slope, intercept };
192 }
193
197 const std::size_t offset = 0, const double compression_factor = 1.0)
198 {
199 std::size_t n = std::distance(first, last);
200 if (n == 0) return { 0.0, 0.0 };
201 if (n == 1) return { 0.0, static_cast<double>(offset) * compression_factor };
202
203 double mean_x = 0.0;
204 double mean_y = 0.0;
205 double c = 0.0;
206 double m2 = 0.0;
207
208 for (std::size_t i = 0; i != n; ++i) {
209 auto x = (*(first + i)).first;
210 std::size_t y = offset + i;
211
212 double dx = x - mean_x;
213 mean_x += dx / (i + 1);
214 mean_y += (y - mean_y) / (i + 1);
215 c += dx * (y - mean_y);
216
217 double dx2 = x - mean_x;
218 m2 += dx * dx2;
219 }
220
221 double cov = c / (n - 1);
222 double var = m2 / (n - 1);
223
224 if (var == 0.f) return { 0.0, mean_y };
225
226 double slope = cov / var * compression_factor;
227 double intercept = mean_y * compression_factor - slope * mean_x;
228 return { slope, intercept };
229 }
230 };
231
232 protected:
233 std::vector<LinearModel> models_;
234
235 public:
237
239 IndexMethod method() const override { return IndexMethod::Rmi; }
240
242 const memory::Memory & memory() const override { throw m::runtime_error("not yet implemented"); }
243
244 /* Returns the size of the index in bytes. */
245 std::size_t size_in_bytes() const override { throw m::runtime_error("not yet implemented"); }
246
248 void finalize() override;
249
253 const_iterator lower_bound(const key_type key) const override {
254 if (not base_type::finalized()) throw m::exception("Index is not finalized.");
256 }
257
261 const_iterator upper_bound(const key_type key) const override {
262 if (not base_type::finalized()) throw m::exception("Index is not finalized.");
264 }
265
266 void dump(std::ostream &out) const override { out << "RecursiveModelIndex<" << typeid(key_type).name() << '>' << std::endl; }
267 void dump() const override { dump(std::cerr); }
268
269 private:
270 std::size_t predict(const key_type key) const {
271 auto segment_id = std::clamp<double>(models_[0](key), 0, models_.size() - 2);
272 auto pred = std::clamp<double>(models_[segment_id + 1](key), 0, base_type::num_entries());
273 return static_cast<std::size_t>(pred);
274 }
276 auto begin = base_type::begin();
277 auto end = base_type::end();
278 std::size_t bound = 1;
279 if (base_type::cmp(*pred, value)) { // search right side
280 auto prev = pred;
281 auto curr = prev + bound;
282 while (curr < end and base_type::cmp(*curr, value)) {
283 bound *= 2;
284 prev = curr;
285 curr += bound;
286 }
287 return std::lower_bound(prev, std::min(curr + 1, end), value, base_type::cmp);
288 } else { // search left side
289 auto prev = pred;
290 auto curr = prev - bound;
291 while (curr > begin and not base_type::cmp(*curr, value)) {
292 bound *= 2;
293 prev = curr;
294 curr -= bound;
295 }
296 return std::lower_bound(std::max(begin, curr), prev, value, base_type::cmp);
297 }
298 }
300 auto begin = base_type::begin();
301 auto end = base_type::end();
302 std::size_t bound = 1;
303 if (not base_type::cmp(value, *pred)) { // search right side
304 auto prev = pred;
305 auto curr = prev + bound;
306 while (curr < end and not base_type::cmp(value, *curr)) {
307 bound *= 2;
308 prev = curr;
309 curr += bound;
310 }
311 return std::upper_bound(prev, std::min(curr + 1, end), value, base_type::cmp);
312 } else { // search left side
313 auto prev = pred;
314 auto curr = prev - bound;
315 while (curr > begin and base_type::cmp(value, *curr)) {
316 bound *= 2;
317 prev = curr;
318 curr -= bound;
319 }
320 return std::upper_bound(std::max(begin, curr), prev, value, base_type::cmp);
321 }
322 }
323};
324
325#define M_INDEX_LIST_TEMPLATED(X) \
326 X(m::idx::ArrayIndex<bool>) \
327 X(m::idx::ArrayIndex<int8_t>) \
328 X(m::idx::ArrayIndex<int16_t>) \
329 X(m::idx::ArrayIndex<int32_t>) \
330 X(m::idx::ArrayIndex<int64_t>) \
331 X(m::idx::ArrayIndex<float>) \
332 X(m::idx::ArrayIndex<double>) \
333 X(m::idx::ArrayIndex<const char*>) \
334 X(m::idx::RecursiveModelIndex<int8_t>) \
335 X(m::idx::RecursiveModelIndex<int16_t>) \
336 X(m::idx::RecursiveModelIndex<int32_t>) \
337 X(m::idx::RecursiveModelIndex<int64_t>) \
338 X(m::idx::RecursiveModelIndex<float>) \
339 X(m::idx::RecursiveModelIndex<double>)
340
341}
342
343}
344
345// explicit instantiation declarations
346#define DECLARE(CLASS) \
347 extern template struct CLASS;
349#undef DECLARE
#define M_INDEX_LIST_TEMPLATED(X)
Definition: Index.hpp:325
#define DECLARE(CLASS)
Definition: Index.hpp:346
#define M_CONSTEXPR_COND(COND, IF_TRUE, IF_FALSE)
Definition: macro.hpp:54
IndexMethod
An enum class that lists all supported index methods.
Definition: Index.hpp:24
‍mutable namespace
Definition: Backend.hpp:10
and
Definition: enum_ops.hpp:12
and arithmetic< U > and same_signedness< T, U > U
Definition: concepts.hpp:90
A Schema represents a sequence of identifiers, optionally with a prefix, and their associated types.
Definition: Schema.hpp:39
A table is a sorted set of attributes.
Definition: Schema.hpp:388
A simple index based on a sorted array that maps keys to their tuple_id.
Definition: Index.hpp:58
const_iterator cend() const
Definition: Index.hpp:154
static constexpr std::size_t ALLOCATION_SIZE
1 GiB
Definition: Index.hpp:60
bool finalized() const
Returns true iff the index is currently finalized.
Definition: Index.hpp:125
const memory::Memory & memory() const override
Returns the underlying memory of the index.
Definition: Index.hpp:108
IndexMethod method() const override
Returns the IndexMethod of the index.
Definition: Index.hpp:105
virtual const_iterator upper_bound(const key_type key) const
Returns an iterator pointing to the first entry of the vector such that entry.key < key is true,...
Definition: Index.hpp:138
std::size_t num_entries() const override
Returns the number of entries in the index.
Definition: Index.hpp:102
const entry_type * const_iterator
Definition: Index.hpp:69
entry_type * iterator
Definition: Index.hpp:68
memory::Memory memory_
‍the underlying memory for a vector holding the index entries consisting of pairs of key and value
Definition: Index.hpp:73
void bulkload(const Table &table, const Schema &key_schema) override
Bulkloads the index from table on the key contained in key_schema by executing a query and adding one...
Definition: Index.cpp:58
const_iterator end() const
Definition: Index.hpp:153
const_iterator cbegin() const
Definition: Index.hpp:148
std::pair< key_type, value_type > entry_type
Definition: Index.hpp:67
iterator begin()
Returns an iterator pointing to the first entry of the index.
Definition: Index.hpp:145
void dump(std::ostream &out) const override
Definition: Index.hpp:156
std::size_t size_in_bytes() const override
Definition: Index.hpp:111
void add(const key_type key, const value_type value)
Adds a single pair of key and value to the index.
Definition: Index.cpp:147
struct m::idx::ArrayIndex::@3 cmp
Custom comparator class to handle the special case of.
key_type rhs
Definition: Index.hpp:85
std::size_t num_entries_
number of entries
Definition: Index.hpp:74
std::size_t value_type
Definition: Index.hpp:66
bool finalized_
flag to signalize whether index is finalized, i.e. array is sorted
Definition: Index.hpp:75
const_iterator begin() const
Definition: Index.hpp:147
virtual const_iterator lower_bound(const key_type key) const
Returns an iterator pointing to the first entry of the vector such that entry.key < key is false,...
Definition: Index.hpp:130
iterator end()
Returns an interator pointing to the first element following the last entry of the index.
Definition: Index.hpp:151
virtual void finalize()
Sorts the underlying vector and flags the index as finalized.
Definition: Index.hpp:119
void dump() const override
Definition: Index.hpp:157
The base class for indexes.
Definition: Index.hpp:28
IndexBase(IndexBase &&)=default
virtual void dump() const =0
virtual const memory::Memory & memory() const =0
Returns the underlying memory of the index.
virtual void dump(std::ostream &out) const =0
virtual std::size_t num_entries() const =0
virtual ~IndexBase()
Definition: Index.hpp:34
static std::string build_query(const Table &table, const Schema &schema)
Constructs a query string to select all attributes in schema from table.
Definition: Index.cpp:40
virtual std::size_t size_in_bytes() const =0
virtual void bulkload(const Table &table, const Schema &key_schema)=0
Bulkloads the index by executing a query on table using key_schema.
virtual IndexMethod method() const =0
Returns the IndexMethod of the index.
IndexBase()=default
LinearModel(double slope, double intercept)
Definition: Index.hpp:175
static LinearModel train_linear_regression(const_iterator first, const_iterator last, const std::size_t offset=0, const double compression_factor=1.0)
Builds a linear regression model from all data points between the first and last .
Definition: Index.hpp:196
static LinearModel train_linear_spline(const_iterator first, const_iterator last, const std::size_t offset=0, const double compression_factor=1.0)
Builds a linear spline model between the first and last data point.
Definition: Index.hpp:181
double operator()(const key_type x) const
Definition: Index.hpp:177
A recursive model index with two layers consiting only of linear monels that maps keys to their tuple...
Definition: Index.hpp:163
std::size_t size_in_bytes() const override
Definition: Index.hpp:245
const_iterator lower_bound(const key_type key) const override
Returns an iterator pointing to the first entry of the vector such that entry.key < key is false,...
Definition: Index.hpp:253
void finalize() override
Sorts the underlying vector, builds the linear models, and flags the index as finalized.
Definition: Index.cpp:163
const memory::Memory & memory() const override
Returns the underlying memory of the index.
Definition: Index.hpp:242
base_type::entry_type entry_type
Definition: Index.hpp:167
base_type::value_type value_type
Definition: Index.hpp:166
base_type::key_type key_type
Definition: Index.hpp:165
std::vector< LinearModel > models_
A vector of linear models to index the underlying data.
Definition: Index.hpp:233
const_iterator lower_bound_exponential_search(const_iterator pred, const key_type value) const
Definition: Index.hpp:275
const_iterator upper_bound(const key_type key) const override
Returns an iterator pointing to the first entry of the vector such that entry.key < key is true,...
Definition: Index.hpp:261
void dump(std::ostream &out) const override
Definition: Index.hpp:266
const_iterator upper_bound_exponential_search(const_iterator pred, const key_type value) const
Definition: Index.hpp:299
base_type::const_iterator const_iterator
Definition: Index.hpp:168
std::size_t predict(const key_type key) const
Definition: Index.hpp:270
IndexMethod method() const override
Returns the IndexMethod of the index.
Definition: Index.hpp:239
void dump() const override
Definition: Index.hpp:267
Represents a mapping created by a memory::Allocator.
Definition: memory.hpp:78
void * addr() const
Returns a pointer to the beginning of the virtual address space where this allocation is mapped to.
Definition: memory.hpp:109
Signals a runtime error that mu*t*able is not responsible for and that mu*t*able was not able to reco...
Definition: exception.hpp:49