mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
Index.cpp
Go to the documentation of this file.
2
5#include <mutable/mutable.hpp>
6#include <mutable/Options.hpp>
8#include <sstream>
9
10
11using namespace m;
12using namespace m::idx;
13
14namespace {
15
16namespace options {
17
19double rmi_model_entry_ratio = 0.01;
20
21}
22
23__attribute__((constructor(201)))
24static void add_index_args()
25{
26 Catalog &C = Catalog::Get();
27
28 /*----- Command-line arguments -----*/
29 C.arg_parser().add<double>(
30 /* group= */ "Index",
31 /* short= */ nullptr,
32 /* long= */ "--rmi-model-entry-ratio",
33 /* description= */ "specify the ratio of linear models to index entries for recursive model indexes",
34 /* callback= */ [](double rmi_model_entry_ratio){ options::rmi_model_entry_ratio = rmi_model_entry_ratio; }
35 );
36}
37
38}
39
40std::string IndexBase::build_query(const Table &table, const Schema &schema)
41{
42 std::ostringstream oss;
43 oss << "SELECT ";
44 for (std::size_t i = 0; i != schema.num_entries(); ++i) {
45 if (i != 0) oss << ", ";
46 oss << schema.at(i).id;
47 }
48 oss << " FROM " << table.name() << ';';
49 return oss.str();
50}
51
52template<typename Key>
53void ArrayIndex<Key>::bulkload(const Table &table, const Schema &key_schema)
54{
55 /* XXX: Disable timer during execution to not print times for query that is performed as part of bulkloading. */
56 const auto &old_timer = std::exchange(Catalog::Get().timer(), Timer());
57
58 /* Check that key schema contains a single entry. */
59 if (key_schema.num_entries() != 1)
60 throw invalid_argument("Key schema should contain exactly one entry.");
61 auto entry = key_schema.at(0);
62
63 /* Check that key type and attribute type match. */
64 auto attribute_type = entry.type;
65#define CHECK(TYPE) \
66 if constexpr(not std::same_as<key_type, TYPE>) \
67 throw invalid_argument("Key type and attribute type do not match."); \
68 return
69
71 [](const Boolean&) { CHECK(bool); },
72 [](const Numeric &n) {
73 switch (n.kind) {
74 case Numeric::N_Int:
75 case Numeric::N_Decimal:
76 switch (n.size()) {
77 default: M_unreachable("invalid size");
78 case 8: CHECK(int8_t);
79 case 16: CHECK(int16_t);
80 case 32: CHECK(int32_t);
81 case 64: CHECK(int64_t);
82 }
83 case Numeric::N_Float:
84 switch (n.size()) {
85 default: M_unreachable("invalid size");
86 case 32: CHECK(float);
87 case 64: CHECK(double);
88 }
89 }
90 },
91 [](const CharacterSequence&) { CHECK(const char*); },
92 [](const Date&) { CHECK(int32_t); },
93 [](const DateTime&) { CHECK(int64_t); },
94 [](auto&&) { M_unreachable("invalid type"); },
95 }, *attribute_type);
96#undef CHECk
97
98 /* Build the query to retrieve keys. */
99 auto query = build_query(table, key_schema);
100
101 /* Create the diagnostics object. */
102 Diagnostic diag(Options::Get().has_color, std::cout, std::cerr);
103
104 /* Compute statement from query string. */
105 auto stmt = statement_from_string(diag, query);
106
107 /* Define get function based on key_type. */
108 std::function<key_type(const Tuple&)> fn_get;
109 if constexpr(integral<key_type>)
110 fn_get = [](const Tuple &t) { return static_cast<key_type>(t.get(0).as<int64_t>()); };
111 else // bool, float, double, const char*
112 fn_get = [](const Tuple &t) { return t.get(0).as<key_type>(); };
113
114 /* Define callback operator to add keys to index. */
115 std::size_t tuple_id = 0;
116 auto fn_add = [&](const Schema&, const Tuple &tuple) {
117 if (not tuple.is_null(0))
118 this->add(fn_get(tuple), tuple_id);
119 tuple_id++;
120 };
121 auto consumer = std::make_unique<CallbackOperator>(fn_add);
122
123 /* Create backend on which the query is exectued. We are using the `Interpreter` as the `wasm::Scan` might have
124 * been deactivated via CLI which results in not finding a plan for the query.
125 * TODO: We plan on using the default `Backend` set in the `Catalog` with temporarily adjusting the options to make
126 * sure `wasm::Scan` is available. */
127 static thread_local std::unique_ptr<Backend> backend;
128 if (not backend)
129 backend = Catalog::Get().create_backend(Catalog::Get().pool("Interpreter"));
130
131 /* Execute query to insert tuples. */
132 m::execute_query(diag, as<ast::SelectStmt>(*stmt), std::move(consumer), *backend);
133
134 /* Finalize index. */
135 finalize();
136
137 /* XXX: Reenable timer. */
138 std::exchange(Catalog::Get().timer(), std::move(old_timer));
139}
140
141template<typename Key>
142void ArrayIndex<Key>::add(const key_type key, const value_type value)
143{
144 if constexpr(std::same_as<key_type, const char*>) {
145 Catalog &C = Catalog::Get();
146 data_.emplace_back(C.pool(key), value);
147 } else {
148 data_.emplace_back(key, value);
149 }
150 finalized_ = false;
151}
152
153template<arithmetic Key>
155{
156 /* Sort data. */
157 std::sort(base_type::data_.begin(), base_type::data_.end(), base_type::cmp);
158
159 /* Compute number of models. */
160 auto begin = base_type::begin();
161 auto end = base_type::end();
162 std::size_t n_keys = std::distance(begin, end);
163 std::size_t n_models = std::max<std::size_t>(1, n_keys * options::rmi_model_entry_ratio);
164 models_.reserve(n_models + 1);
165
166 /* Train first layer. */
167 models_.emplace_back(
168 LinearModel::train_linear_spline(
169 /* begin= */ begin,
170 /* end= */ end,
171 /* offset= */ 0,
172 /* compression_factor= */ static_cast<double>(n_models) / n_keys
173 )
174 );
175
176 /* Train second layer. */
177 auto get_segment_id = [&](entry_type e) { return std::clamp<double>(models_[0](e.first), 0, n_models - 1); };
178 std::size_t segment_start = 0;
179 std::size_t segment_id = 0;
180 for (std::size_t i = 0; i != n_keys; ++i) {
181 auto pos = begin + i;
182 std::size_t pred_segment_id = get_segment_id(*pos);
183 if (pred_segment_id > segment_id) {
184 models_.emplace_back(
185 LinearModel::train_linear_regression(
186 /* begin= */ begin + segment_start,
187 /* end= */ pos,
188 /* offset= */ segment_start
189 )
190 );
191 for (std::size_t j = segment_id + 1; j < pred_segment_id; ++j) {
192 models_.emplace_back(
193 LinearModel::train_linear_regression(
194 /* begin= */ pos - 1,
195 /* end= */ pos,
196 /* offset= */ i - 1
197 )
198 );
199 }
200 segment_id = pred_segment_id;
201 segment_start = i;
202
203 }
204 }
205 models_.emplace_back(
206 LinearModel::train_linear_regression(
207 /* begin= */ begin + segment_start,
208 /* end= */ end,
209 /* offset= */ segment_start
210 )
211 );
212 for (std::size_t j = segment_id + 1; j < n_models; ++j) {
213 models_.emplace_back(
214 LinearModel::train_linear_regression(
215 /* begin= */ end - 1,
216 /* end= */ end,
217 /* offset= */ n_keys - 1
218 )
219 );
220 }
221
222 /* Mark index as finalized. */
223 base_type::finalized_ = true;
224};
225
226// explicit instantiations to prevent linker errors
227#define INSTANTIATE(CLASS) \
228 template struct CLASS;
230#undef INSTANTIATE
#define CHECK(TYPE)
#define M_INDEX_LIST_TEMPLATED(X)
Definition: Index.hpp:294
__attribute__((constructor(202))) static void register_interpreter()
#define INSTANTIATE(CLASS)
void add(const char *group_name, const char *short_name, const char *long_name, const char *description, Callback &&callback)
Adds a new group option to the ArgParser.
Definition: ArgParser.hpp:84
Check whether.
Definition: concepts.hpp:23
#define M_unreachable(MSG)
Definition: macro.hpp:146
Definition: Index.hpp:20
‍mutable namespace
Definition: Backend.hpp:10
std::unique_ptr< ast::Stmt > M_EXPORT statement_from_string(Diagnostic &diag, const std::string &str)
Use lexer, parser, and semantic analysis to create a Stmt from str.
Definition: mutable.cpp:25
void M_EXPORT execute_query(Diagnostic &diag, const ast::SelectStmt &stmt, std::unique_ptr< Consumer > consumer)
Optimizes and executes the given SelectStmt.
Definition: mutable.cpp:368
auto visit(Callable &&callable, Base &obj, m::tag< Callable > &&=m::tag< Callable >())
Generic implementation to visit a class hierarchy, with similar syntax as std::visit.
Definition: Visitor.hpp:138
‍command-line options for the HeuristicSearchPlanEnumerator
Definition: V8Engine.cpp:44
The boolean type.
Definition: Type.hpp:230
The catalog contains all Databases and keeps track of all meta information of the database system.
Definition: Catalog.hpp:215
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
Definition: Catalog.hpp:274
static Catalog & Get()
Return a reference to the single Catalog instance.
std::unique_ptr< Backend > create_backend() const
Returns a new Backend.
Definition: Catalog.hpp:477
m::ArgParser & arg_parser()
Definition: Catalog.hpp:253
The type of character strings, both fixed length and varying length.
Definition: Type.hpp:290
The date type.
Definition: Type.hpp:364
The date type.
Definition: Type.hpp:335
The numeric type represents integer and floating-point types of different precision and scale.
Definition: Type.hpp:393
static Options & Get()
Return a reference to the single Options instance.
Definition: Options.cpp:9
const Type * type
Definition: Schema.hpp:87
A Schema represents a sequence of identifiers, optionally with a prefix, and their associated types.
Definition: Schema.hpp:39
std::size_t num_entries() const
Returns the number of entries in this Schema.
Definition: Schema.hpp:124
entry_type & at(std::size_t idx)
Returns the entry at index idx with in-bounds checking.
Definition: Schema.hpp:143
A table is a sorted set of attributes.
Definition: Schema.hpp:388
virtual const ThreadSafePooledString & name() const =0
Returns the name of the Table.
Collect timings of events.
Definition: Timer.hpp:17
Value & get(std::size_t idx)
Returns a reference to the Value at index idx.
Definition: Tuple.hpp:263
std::conditional_t< std::is_pointer_v< T >, T, T & > as()
Returns a reference to the value interpreted as of type T.
Definition: Tuple.hpp:79
void bulkload(const Table &table, const Schema &key_schema) override
Bulkloads the index from table on the key contained in key_schema by executing a query and adding one...
Definition: Index.cpp:53
void add(const key_type key, const value_type value)
Adds a single pair of key and value to the index.
Definition: Index.cpp:142
std::size_t value_type
Definition: Index.hpp:55
static std::string build_query(const Table &table, const Schema &schema)
Constructs a query string to select all attributes in schema from table.
Definition: Index.cpp:40
void finalize() override
Sorts the underlying vector, builds the linear models, and flags the index as finalized.
Definition: Index.cpp:154
base_type::entry_type entry_type
Definition: Index.hpp:141
Signals that an argument to a function of method was invalid.
Definition: exception.hpp:37