mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
Index.cpp
Go to the documentation of this file.
2
5#include <mutable/mutable.hpp>
6#include <mutable/Options.hpp>
8#include <sstream>
9
10
11using namespace m;
12using namespace m::idx;
13
14namespace {
15
16namespace options {
17
19double rmi_model_entry_ratio = 0.01;
20
21}
22
23__attribute__((constructor(201)))
24static void add_index_args()
25{
26 Catalog &C = Catalog::Get();
27
28 /*----- Command-line arguments -----*/
29 C.arg_parser().add<double>(
30 /* group= */ "Index",
31 /* short= */ nullptr,
32 /* long= */ "--rmi-model-entry-ratio",
33 /* description= */ "specify the ratio of linear models to index entries for recursive model indexes",
34 /* callback= */ [](double rmi_model_entry_ratio){ options::rmi_model_entry_ratio = rmi_model_entry_ratio; }
35 );
36}
37
38}
39
40std::string IndexBase::build_query(const Table &table, const Schema &schema)
41{
42 std::ostringstream oss;
43 oss << "SELECT ";
44 for (std::size_t i = 0; i != schema.num_entries(); ++i) {
45 if (i != 0) oss << ", ";
46 oss << schema.at(i).id;
47 }
48 oss << " FROM " << table.name() << ';';
49 return oss.str();
50}
51
52template<typename Key>
54 : memory_(Catalog::Get().allocator().allocate(ALLOCATION_SIZE)), num_entries_(0), finalized_(false)
55{ }
56
57template<typename Key>
58void ArrayIndex<Key>::bulkload(const Table &table, const Schema &key_schema)
59{
60 /* XXX: Disable timer during execution to not print times for query that is performed as part of bulkloading. */
61 const auto &old_timer = std::exchange(Catalog::Get().timer(), Timer());
62
63 /* Check that key schema contains a single entry. */
64 if (key_schema.num_entries() != 1)
65 throw invalid_argument("Key schema should contain exactly one entry.");
66 auto entry = key_schema.at(0);
67
68 /* Check that key type and attribute type match. */
69 auto attribute_type = entry.type;
70#define CHECK(TYPE) \
71 if constexpr(not std::same_as<key_type, TYPE>) \
72 throw invalid_argument("Key type and attribute type do not match."); \
73 return
74
76 [](const Boolean&) { CHECK(bool); },
77 [](const Numeric &n) {
78 switch (n.kind) {
79 case Numeric::N_Int:
80 case Numeric::N_Decimal:
81 switch (n.size()) {
82 default: M_unreachable("invalid size");
83 case 8: CHECK(int8_t);
84 case 16: CHECK(int16_t);
85 case 32: CHECK(int32_t);
86 case 64: CHECK(int64_t);
87 }
88 case Numeric::N_Float:
89 switch (n.size()) {
90 default: M_unreachable("invalid size");
91 case 32: CHECK(float);
92 case 64: CHECK(double);
93 }
94 }
95 },
96 [](const CharacterSequence&) { CHECK(const char*); },
97 [](const Date&) { CHECK(int32_t); },
98 [](const DateTime&) { CHECK(int64_t); },
99 [](auto&&) { M_unreachable("invalid type"); },
100 }, *attribute_type);
101#undef CHECk
102
103 /* Build the query to retrieve keys. */
104 auto query = build_query(table, key_schema);
105
106 /* Create the diagnostics object. */
107 Diagnostic diag(Options::Get().has_color, std::cout, std::cerr);
108
109 /* Compute statement from query string. */
110 auto stmt = statement_from_string(diag, query);
111
112 /* Define get function based on key_type. */
113 std::function<key_type(const Tuple&)> fn_get;
114 if constexpr(integral<key_type>)
115 fn_get = [](const Tuple &t) { return static_cast<key_type>(t.get(0).as<int64_t>()); };
116 else // bool, float, double, const char*
117 fn_get = [](const Tuple &t) { return t.get(0).as<key_type>(); };
118
119 /* Define callback operator to add keys to index. */
120 std::size_t tuple_id = 0;
121 auto fn_add = [&](const Schema&, const Tuple &tuple) {
122 if (not tuple.is_null(0))
123 this->add(fn_get(tuple), tuple_id);
124 tuple_id++;
125 };
126 auto consumer = std::make_unique<CallbackOperator>(fn_add);
127
128 /* Create backend on which the query is exectued. We are using the `Interpreter` as the `wasm::Scan` might have
129 * been deactivated via CLI which results in not finding a plan for the query.
130 * TODO: We plan on using the default `Backend` set in the `Catalog` with temporarily adjusting the options to make
131 * sure `wasm::Scan` is available. */
132 static thread_local std::unique_ptr<Backend> backend;
133 if (not backend)
134 backend = Catalog::Get().create_backend(Catalog::Get().pool("Interpreter"));
135
136 /* Execute query to insert tuples. */
137 m::execute_query(diag, as<ast::SelectStmt>(*stmt), std::move(consumer), *backend);
138
139 /* Finalize index. */
140 finalize();
141
142 /* XXX: Reenable timer. */
143 std::exchange(Catalog::Get().timer(), std::move(old_timer));
144}
145
146template<typename Key>
147void ArrayIndex<Key>::add(const key_type key, const value_type value)
148{
149 if ((num_entries_ + 1) * sizeof(entry_type) > memory_.size())
150 throw m::runtime_error("not enough memory allocated to add another entry");
151
152 if constexpr(std::same_as<key_type, const char*>) {
153 Catalog &C = Catalog::Get();
154 *end() = entry_type(C.pool(key), value);
155 } else {
156 *end() = entry_type(key, value);
157 }
158 ++num_entries_;
159 finalized_ = false;
160}
161
162template<arithmetic Key>
164{
165 /* Sort data. */
166 std::sort(base_type::begin(), base_type::end(), base_type::cmp);
167
168 /* Compute number of models. */
169 auto begin = base_type::begin();
170 auto end = base_type::end();
171 std::size_t n_keys = std::distance(begin, end);
172 std::size_t n_models = std::max<std::size_t>(1, n_keys * options::rmi_model_entry_ratio);
173 models_.reserve(n_models + 1);
174
175 /* Train first layer. */
176 models_.emplace_back(
177 LinearModel::train_linear_spline(
178 /* begin= */ begin,
179 /* end= */ end,
180 /* offset= */ 0,
181 /* compression_factor= */ static_cast<double>(n_models) / n_keys
182 )
183 );
184
185 /* Train second layer. */
186 auto get_segment_id = [&](entry_type e) { return std::clamp<double>(models_[0](e.first), 0, n_models - 1); };
187 std::size_t segment_start = 0;
188 std::size_t segment_id = 0;
189 for (std::size_t i = 0; i != n_keys; ++i) {
190 auto pos = begin + i;
191 std::size_t pred_segment_id = get_segment_id(*pos);
192 if (pred_segment_id > segment_id) {
193 models_.emplace_back(
194 LinearModel::train_linear_regression(
195 /* begin= */ begin + segment_start,
196 /* end= */ pos,
197 /* offset= */ segment_start
198 )
199 );
200 for (std::size_t j = segment_id + 1; j < pred_segment_id; ++j) {
201 models_.emplace_back(
202 LinearModel::train_linear_regression(
203 /* begin= */ pos - 1,
204 /* end= */ pos,
205 /* offset= */ i - 1
206 )
207 );
208 }
209 segment_id = pred_segment_id;
210 segment_start = i;
211
212 }
213 }
214 models_.emplace_back(
215 LinearModel::train_linear_regression(
216 /* begin= */ begin + segment_start,
217 /* end= */ end,
218 /* offset= */ segment_start
219 )
220 );
221 for (std::size_t j = segment_id + 1; j < n_models; ++j) {
222 models_.emplace_back(
223 LinearModel::train_linear_regression(
224 /* begin= */ end - 1,
225 /* end= */ end,
226 /* offset= */ n_keys - 1
227 )
228 );
229 }
230
231 /* Mark index as finalized. */
232 base_type::finalized_ = true;
233};
234
235// explicit instantiations to prevent linker errors
236#define INSTANTIATE(CLASS) \
237 template struct CLASS;
239#undef INSTANTIATE
#define CHECK(TYPE)
#define M_INDEX_LIST_TEMPLATED(X)
Definition: Index.hpp:325
__attribute__((constructor(202))) static void register_interpreter()
#define INSTANTIATE(CLASS)
void add(const char *group_name, const char *short_name, const char *long_name, const char *description, Callback &&callback)
Adds a new group option to the ArgParser.
Definition: ArgParser.hpp:84
Check whether.
Definition: concepts.hpp:23
#define M_unreachable(MSG)
Definition: macro.hpp:146
Definition: Index.hpp:21
‍mutable namespace
Definition: Backend.hpp:10
std::unique_ptr< ast::Stmt > M_EXPORT statement_from_string(Diagnostic &diag, const std::string &str)
Use lexer, parser, and semantic analysis to create a Stmt from str.
Definition: mutable.cpp:25
void M_EXPORT execute_query(Diagnostic &diag, const ast::SelectStmt &stmt, std::unique_ptr< Consumer > consumer)
Optimizes and executes the given SelectStmt.
Definition: mutable.cpp:368
auto visit(Callable &&callable, Base &obj, m::tag< Callable > &&=m::tag< Callable >())
Generic implementation to visit a class hierarchy, with similar syntax as std::visit.
Definition: Visitor.hpp:138
‍command-line options for the HeuristicSearchPlanEnumerator
Definition: V8Engine.cpp:44
The boolean type.
Definition: Type.hpp:230
The catalog contains all Databases and keeps track of all meta information of the database system.
Definition: Catalog.hpp:215
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
Definition: Catalog.hpp:274
static Catalog & Get()
Return a reference to the single Catalog instance.
std::unique_ptr< Backend > create_backend() const
Returns a new Backend.
Definition: Catalog.hpp:477
m::ArgParser & arg_parser()
Definition: Catalog.hpp:253
The type of character strings, both fixed length and varying length.
Definition: Type.hpp:290
The date type.
Definition: Type.hpp:364
The date type.
Definition: Type.hpp:335
The numeric type represents integer and floating-point types of different precision and scale.
Definition: Type.hpp:393
static Options & Get()
Return a reference to the single Options instance.
Definition: Options.cpp:9
const Type * type
Definition: Schema.hpp:87
A Schema represents a sequence of identifiers, optionally with a prefix, and their associated types.
Definition: Schema.hpp:39
std::size_t num_entries() const
Returns the number of entries in this Schema.
Definition: Schema.hpp:124
entry_type & at(std::size_t idx)
Returns the entry at index idx with in-bounds checking.
Definition: Schema.hpp:143
A table is a sorted set of attributes.
Definition: Schema.hpp:388
virtual const ThreadSafePooledString & name() const =0
Returns the name of the Table.
Collect timings of events.
Definition: Timer.hpp:17
Value & get(std::size_t idx)
Returns a reference to the Value at index idx.
Definition: Tuple.hpp:263
std::conditional_t< std::is_pointer_v< T >, T, T & > as()
Returns a reference to the value interpreted as of type T.
Definition: Tuple.hpp:79
void bulkload(const Table &table, const Schema &key_schema) override
Bulkloads the index from table on the key contained in key_schema by executing a query and adding one...
Definition: Index.cpp:58
std::pair< key_type, value_type > entry_type
Definition: Index.hpp:67
void add(const key_type key, const value_type value)
Adds a single pair of key and value to the index.
Definition: Index.cpp:147
std::size_t value_type
Definition: Index.hpp:66
static std::string build_query(const Table &table, const Schema &schema)
Constructs a query string to select all attributes in schema from table.
Definition: Index.cpp:40
void finalize() override
Sorts the underlying vector, builds the linear models, and flags the index as finalized.
Definition: Index.cpp:163
base_type::entry_type entry_type
Definition: Index.hpp:167
Signals that an argument to a function of method was invalid.
Definition: exception.hpp:37
Signals a runtime error that mu*t*able is not responsible for and that mu*t*able was not able to reco...
Definition: exception.hpp:49