mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
Index.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <algorithm>
4#include <cmath>
5#include <cstring>
6#include <iostream>
10#include <utility>
11#include <vector>
12
13
14namespace m {
15
16// Forward declarations
17struct Table;
18struct Schema;
19
20namespace idx {
21
23enum class IndexMethod { Array, Rmi };
24
27{
28 protected:
29 IndexBase() = default;
30
31 public:
32 IndexBase(IndexBase&&) = default;
33 virtual ~IndexBase() { }
34
36 virtual void bulkload(const Table &table, const Schema &key_schema) = 0;
37 /* Returns the number of entries in the index. */
38 virtual std::size_t num_entries() const = 0;
40 virtual IndexMethod method() const = 0;
41
42 virtual void dump(std::ostream &out) const = 0;
43 virtual void dump() const = 0;
44
45 protected:
47 static std::string build_query(const Table &table, const Schema &schema);
48};
49
51template<typename Key>
53{
54 using key_type = Key;
55 using value_type = std::size_t;
56 using entry_type = std::pair<key_type, value_type>;
57 using container_type = std::vector<entry_type>;
58 using const_iterator = typename container_type::const_iterator;
59
60 protected:
63
65 struct
66 {
67 template<typename T, typename U>
68 requires (std::same_as<T, key_type> or std::same_as<T, entry_type>) and
69 (std::same_as<U, key_type> or std::same_as<U, entry_type>)
70 bool operator()(const T &_lhs, const U &_rhs) const {
71 key_type lhs = M_CONSTEXPR_COND((std::same_as<T, key_type>), _lhs, _lhs.first);
72 key_type rhs = M_CONSTEXPR_COND((std::same_as<U, key_type>), _rhs, _rhs.first);
73 if constexpr(std::same_as<key_type, const char*>)
74 return std::strcmp(lhs, rhs) < 0;
75 else
76 return lhs < rhs;
77 }
78 } cmp;
79
80 public:
81 ArrayIndex() : finalized_(false) { }
82
86 void bulkload(const Table &table, const Schema &key_schema) override;
87
89 std::size_t num_entries() const override { return data_.size(); }
90
92 IndexMethod method() const override { return IndexMethod::Array; }
93
96 void add(const key_type key, const value_type value);
97
99 virtual void finalize() {
100 std::sort(data_.begin(), data_.end(), cmp);
101 finalized_ = true;
102 }
103
105 bool finalized() const { return finalized_; }
106
110 virtual const_iterator lower_bound(const key_type key) const {
111 if (not finalized_) throw m::exception("Index is not finalized.");
112 return std::lower_bound(data_.begin(), data_.end(), entry_type{key, value_type()}, cmp);
113 }
114
118 virtual const_iterator upper_bound(const key_type key) const {
119 if (not finalized_) throw m::exception("Index is not finalized.");
120 return std::upper_bound(data_.begin(), data_.end(), entry_type{key, value_type()}, cmp);
121 }
122
124 const_iterator begin() const { return data_.cbegin(); }
125 const_iterator cbegin() const { return data_.cbegin(); }
127 const_iterator end() const { return data_.cend(); }
128 const_iterator cend() const { return data_.cend(); }
129
130 void dump(std::ostream &out) const override { out << "ArrayIndex<" << typeid(key_type).name() << '>' << std::endl; }
131 void dump() const override { dump(std::cerr); }
132};
133
135template<arithmetic Key>
137{
144
146 {
147 double slope;
148 double intercept;
149
151
152 double operator()(const key_type x) const { return std::fma(slope, static_cast<double>(x), intercept); }
153
157 const std::size_t offset = 0, const double compression_factor = 1.0)
158 {
159 std::size_t n = std::distance(first, last);
160 if (n == 0) return { 0.0, 0.0 };
161 if (n == 1) return { 0.0, static_cast<double>(offset) * compression_factor };
162 double numerator = static_cast<double>(n); // (offset + n) - offset
163 double denominator = static_cast<double>((*(last - 1)).first - (*first).first);
164 double slope = denominator != 0 ? numerator/denominator * compression_factor : 0.0;
165 double intercept = offset * compression_factor - slope * (*first).first;
166 return { slope, intercept };
167 }
168
172 const std::size_t offset = 0, const double compression_factor = 1.0)
173 {
174 std::size_t n = std::distance(first, last);
175 if (n == 0) return { 0.0, 0.0 };
176 if (n == 1) return { 0.0, static_cast<double>(offset) * compression_factor };
177
178 double mean_x = 0.0;
179 double mean_y = 0.0;
180 double c = 0.0;
181 double m2 = 0.0;
182
183 for (std::size_t i = 0; i != n; ++i) {
184 auto x = (*(first + i)).first;
185 std::size_t y = offset + i;
186
187 double dx = x - mean_x;
188 mean_x += dx / (i + 1);
189 mean_y += (y - mean_y) / (i + 1);
190 c += dx * (y - mean_y);
191
192 double dx2 = x - mean_x;
193 m2 += dx * dx2;
194 }
195
196 double cov = c / (n - 1);
197 double var = m2 / (n - 1);
198
199 if (var == 0.f) return { 0.0, mean_y };
200
201 double slope = cov / var * compression_factor;
202 double intercept = mean_y * compression_factor - slope * mean_x;
203 return { slope, intercept };
204 }
205 };
206
207 protected:
208 std::vector<LinearModel> models_;
209
210 public:
212
214 IndexMethod method() const override { return IndexMethod::Rmi; }
215
217 void finalize() override;
218
222 const_iterator lower_bound(const key_type key) const override {
223 if (not base_type::finalized()) throw m::exception("Index is not finalized.");
225 }
226
230 const_iterator upper_bound(const key_type key) const override {
231 if (not base_type::finalized()) throw m::exception("Index is not finalized.");
233 }
234
235 void dump(std::ostream &out) const override { out << "RecursiveModelIndex<" << typeid(key_type).name() << '>' << std::endl; }
236 void dump() const override { dump(std::cerr); }
237
238 private:
239 std::size_t predict(const key_type key) const {
240 auto segment_id = std::clamp<double>(models_[0](key), 0, models_.size() - 2);
241 auto pred = std::clamp<double>(models_[segment_id + 1](key), 0, base_type::data_.size());
242 return static_cast<std::size_t>(pred);
243 }
245 auto begin = base_type::begin();
246 auto end = base_type::end();
247 std::size_t bound = 1;
248 if (base_type::cmp(*pred, value)) { // search right side
249 auto prev = pred;
250 auto curr = prev + bound;
251 while (curr < end and base_type::cmp(*curr, value)) {
252 bound *= 2;
253 prev = curr;
254 curr += bound;
255 }
256 return std::lower_bound(prev, std::min(curr + 1, end), value, base_type::cmp);
257 } else { // search left side
258 auto prev = pred;
259 auto curr = prev - bound;
260 while (curr > begin and not base_type::cmp(*curr, value)) {
261 bound *= 2;
262 prev = curr;
263 curr -= bound;
264 }
265 return std::lower_bound(std::max(begin, curr), prev, value, base_type::cmp);
266 }
267 }
269 auto begin = base_type::begin();
270 auto end = base_type::end();
271 std::size_t bound = 1;
272 if (not base_type::cmp(value, *pred)) { // search right side
273 auto prev = pred;
274 auto curr = prev + bound;
275 while (curr < end and not base_type::cmp(value, *curr)) {
276 bound *= 2;
277 prev = curr;
278 curr += bound;
279 }
280 return std::upper_bound(prev, std::min(curr + 1, end), value, base_type::cmp);
281 } else { // search left side
282 auto prev = pred;
283 auto curr = prev - bound;
284 while (curr > begin and base_type::cmp(value, *curr)) {
285 bound *= 2;
286 prev = curr;
287 curr -= bound;
288 }
289 return std::upper_bound(std::max(begin, curr), prev, value, base_type::cmp);
290 }
291 }
292};
293
294#define M_INDEX_LIST_TEMPLATED(X) \
295 X(m::idx::ArrayIndex<bool>) \
296 X(m::idx::ArrayIndex<int8_t>) \
297 X(m::idx::ArrayIndex<int16_t>) \
298 X(m::idx::ArrayIndex<int32_t>) \
299 X(m::idx::ArrayIndex<int64_t>) \
300 X(m::idx::ArrayIndex<float>) \
301 X(m::idx::ArrayIndex<double>) \
302 X(m::idx::ArrayIndex<const char*>) \
303 X(m::idx::RecursiveModelIndex<int8_t>) \
304 X(m::idx::RecursiveModelIndex<int16_t>) \
305 X(m::idx::RecursiveModelIndex<int32_t>) \
306 X(m::idx::RecursiveModelIndex<int64_t>) \
307 X(m::idx::RecursiveModelIndex<float>) \
308 X(m::idx::RecursiveModelIndex<double>)
309
310}
311
312}
313
314// explicit instantiation declarations
315#define DECLARE(CLASS) \
316 extern template struct CLASS;
318#undef DECLARE
#define M_INDEX_LIST_TEMPLATED(X)
Definition: Index.hpp:294
#define DECLARE(CLASS)
Definition: Index.hpp:315
#define M_CONSTEXPR_COND(COND, IF_TRUE, IF_FALSE)
Definition: macro.hpp:54
IndexMethod
An enum class that lists all supported index methods.
Definition: Index.hpp:23
‍mutable namespace
Definition: Backend.hpp:10
and
Definition: enum_ops.hpp:12
and arithmetic< U > and same_signedness< T, U > U
Definition: concepts.hpp:90
A Schema represents a sequence of identifiers, optionally with a prefix, and their associated types.
Definition: Schema.hpp:39
A table is a sorted set of attributes.
Definition: Schema.hpp:388
A simple index based on a sorted array that maps keys to their tuple_id.
Definition: Index.hpp:53
const_iterator cend() const
Definition: Index.hpp:128
bool finalized() const
Returns true iff the index is currently finalized.
Definition: Index.hpp:105
IndexMethod method() const override
Returns the IndexMethod of the index.
Definition: Index.hpp:92
virtual const_iterator upper_bound(const key_type key) const
Returns an iterator pointing to the first entry of the vector such that entry.key < key is true,...
Definition: Index.hpp:118
std::size_t num_entries() const override
Returns the number of entries in the index.
Definition: Index.hpp:89
std::vector< entry_type > container_type
Definition: Index.hpp:57
void bulkload(const Table &table, const Schema &key_schema) override
Bulkloads the index from table on the key contained in key_schema by executing a query and adding one...
Definition: Index.cpp:53
const_iterator end() const
Returns an interator pointing to the first element following the last entry of the index.
Definition: Index.hpp:127
const_iterator cbegin() const
Definition: Index.hpp:125
std::pair< key_type, value_type > entry_type
Definition: Index.hpp:56
void dump(std::ostream &out) const override
Definition: Index.hpp:130
void add(const key_type key, const value_type value)
Adds a single pair of key and value to the index.
Definition: Index.cpp:142
struct m::idx::ArrayIndex::@3 cmp
Custom comparator class to handle the special case of.
key_type rhs
Definition: Index.hpp:72
std::size_t value_type
Definition: Index.hpp:55
bool finalized_
flag to signalize whether index is finalized, i.e. array is sorted
Definition: Index.hpp:62
const_iterator begin() const
Returns an iterator pointing to the first entry of the index.
Definition: Index.hpp:124
virtual const_iterator lower_bound(const key_type key) const
Returns an iterator pointing to the first entry of the vector such that entry.key < key is false,...
Definition: Index.hpp:110
typename container_type::const_iterator const_iterator
Definition: Index.hpp:58
container_type data_
A vector holding the index entries consisting of pairs of key and value.
Definition: Index.hpp:61
virtual void finalize()
Sorts the underlying vector and flags the index as finalized.
Definition: Index.hpp:99
void dump() const override
Definition: Index.hpp:131
The base class for indexes.
Definition: Index.hpp:27
IndexBase(IndexBase &&)=default
virtual void dump() const =0
virtual void dump(std::ostream &out) const =0
virtual std::size_t num_entries() const =0
virtual ~IndexBase()
Definition: Index.hpp:33
static std::string build_query(const Table &table, const Schema &schema)
Constructs a query string to select all attributes in schema from table.
Definition: Index.cpp:40
virtual void bulkload(const Table &table, const Schema &key_schema)=0
Bulkloads the index by executing a query on table using key_schema.
virtual IndexMethod method() const =0
Returns the IndexMethod of the index.
IndexBase()=default
LinearModel(double slope, double intercept)
Definition: Index.hpp:150
static LinearModel train_linear_regression(const_iterator first, const_iterator last, const std::size_t offset=0, const double compression_factor=1.0)
Builds a linear regression model from all data points between the first and last .
Definition: Index.hpp:171
static LinearModel train_linear_spline(const_iterator first, const_iterator last, const std::size_t offset=0, const double compression_factor=1.0)
Builds a linear spline model between the first and last data point.
Definition: Index.hpp:156
double operator()(const key_type x) const
Definition: Index.hpp:152
A recursive model index with two layers consiting only of linear monels that maps keys to their tuple...
Definition: Index.hpp:137
const_iterator lower_bound(const key_type key) const override
Returns an iterator pointing to the first entry of the vector such that entry.key < key is false,...
Definition: Index.hpp:222
void finalize() override
Sorts the underlying vector, builds the linear models, and flags the index as finalized.
Definition: Index.cpp:154
base_type::entry_type entry_type
Definition: Index.hpp:141
base_type::value_type value_type
Definition: Index.hpp:140
base_type::key_type key_type
Definition: Index.hpp:139
std::vector< LinearModel > models_
A vector of linear models to index the underlying data.
Definition: Index.hpp:208
const_iterator lower_bound_exponential_search(const_iterator pred, const key_type value) const
Definition: Index.hpp:244
const_iterator upper_bound(const key_type key) const override
Returns an iterator pointing to the first entry of the vector such that entry.key < key is true,...
Definition: Index.hpp:230
void dump(std::ostream &out) const override
Definition: Index.hpp:235
const_iterator upper_bound_exponential_search(const_iterator pred, const key_type value) const
Definition: Index.hpp:268
base_type::const_iterator const_iterator
Definition: Index.hpp:143
std::size_t predict(const key_type key) const
Definition: Index.hpp:239
IndexMethod method() const override
Returns the IndexMethod of the index.
Definition: Index.hpp:214
void dump() const override
Definition: Index.hpp:236
base_type::container_type container_type
Definition: Index.hpp:142