mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
CardinalityEstimator.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <fstream>
4#include <iostream>
5#include <mutable/mutable-config.hpp>
9#include <sstream>
10#include <unordered_map>
11#include <vector>
12
13
14namespace m {
15
16namespace ast {
17
18struct Expr;
19
20}
21
22namespace cnf { struct CNF; }
23struct Diagnostic;
24struct GroupingOperator;
25struct LimitOperator;
26struct Operator;
29struct QueryGraph;
30struct SpnWrapper;
31
33
34
42struct M_EXPORT DataModel
43{
44 virtual ~DataModel() = 0;
45
47 virtual void assign_to(Subproblem s) = 0;
48};
49
50
51struct M_EXPORT estimate_join_all_tag : const_virtual_crtp_helper<estimate_join_all_tag>::
52 returns<std::unique_ptr<DataModel>>::
53 crtp_args<const PlanTableSmallOrDense&, const PlanTableLargeAndSparse&>::
54 args<const QueryGraph&, Subproblem, const cnf::CNF&> { };
55
56struct M_EXPORT CardinalityEstimator : estimate_join_all_tag::base_type
57{
58 using estimate_join_all_tag::base_type::operator();
59 using group_type = std::pair<std::reference_wrapper<const ast::Expr>, ThreadSafePooledOptionalString>;
60
63 {
64 explicit data_model_exception(std::string message) : m::exception(std::move(message)) { }
65 };
66
67 virtual ~CardinalityEstimator() = 0;
68
69
70 /*==================================================================================================================
71 * Model calculation
72 *================================================================================================================*/
73
75 virtual std::unique_ptr<DataModel> empty_model() const = 0;
76
83 virtual std::unique_ptr<DataModel> estimate_scan(const QueryGraph &G, Subproblem P) const = 0;
84
91 virtual std::unique_ptr<DataModel> estimate_filter(const QueryGraph &G, const DataModel &data,
92 const cnf::CNF &filter) const = 0;
93
101 virtual std::unique_ptr<DataModel>
102 estimate_limit(const QueryGraph &G, const DataModel &data, const std::size_t limit,
103 const std::size_t offset) const = 0;
104
111 virtual std::unique_ptr<DataModel>
112 estimate_grouping(const QueryGraph &G, const DataModel &data, const std::vector<group_type> &groups) const = 0;
113
121 virtual std::unique_ptr<DataModel>
122 estimate_join(const QueryGraph &G, const DataModel &left, const DataModel &right,
123 const cnf::CNF &condition) const = 0;
124
126 template<typename PlanTable>
127 std::unique_ptr<DataModel>
128 estimate_join_all(const QueryGraph &G, const PlanTable &PT, Subproblem to_join, const cnf::CNF &condition) const
129 {
130 return operator()(estimate_join_all_tag{}, PT, G, to_join, condition);
131 }
132
133
134 /*==================================================================================================================
135 * Prediction via model use
136 *================================================================================================================*/
137
138 virtual std::size_t predict_cardinality(const DataModel &data) const = 0;
139
140 virtual double predict_number_distinct_values(const DataModel &data) const;
141
142 /*==================================================================================================================
143 * other methods
144 *================================================================================================================*/
145
147 friend std::ostream & operator<<(std::ostream &out, const CardinalityEstimator &CE) {
148 CE.print(out);
149 return out;
150 }
152
153 void dump(std::ostream &out) const;
154
155 void dump() const;
156
157 protected:
158 virtual void print(std::ostream &out) const = 0;
159};
160
161namespace {
162
163template<typename Actual>
164struct CardinalityEstimatorCRTP : CardinalityEstimator
165 , estimate_join_all_tag::derived_type<Actual>
166{ };
167
168}
169
173struct M_EXPORT CartesianProductEstimator : CardinalityEstimatorCRTP<CartesianProductEstimator>
174{
176 {
177 std::size_t size;
178
180 CartesianProductDataModel(std::size_t size) : size(size) { }
181
182 void assign_to(Subproblem) override { /* nothing to be done */ }
183 };
184
187
188
189 /*==================================================================================================================
190 * Model calculation
191 *================================================================================================================*/
192
193 std::unique_ptr<DataModel> empty_model() const override;
194 std::unique_ptr<DataModel> estimate_scan(const QueryGraph &G, Subproblem P) const override;
195 std::unique_ptr<DataModel>
196 estimate_filter(const QueryGraph &G, const DataModel &data, const cnf::CNF &filter) const override;
197 std::unique_ptr<DataModel>
198 estimate_limit(const QueryGraph &G, const DataModel &data, std::size_t limit, std::size_t offset) const override;
199 std::unique_ptr<DataModel>
200 estimate_grouping(const QueryGraph &G, const DataModel &data, const std::vector<group_type> &groups) const override;
201 std::unique_ptr<DataModel>
202 estimate_join(const QueryGraph &G, const DataModel &left, const DataModel &right,
203 const cnf::CNF &condition) const override;
204
205 template<typename PlanTable>
206 std::unique_ptr<DataModel>
207 operator()(estimate_join_all_tag, PlanTable &&PT, const QueryGraph &G, Subproblem to_join,
208 const cnf::CNF &condition) const;
209
210
211 /*==================================================================================================================
212 * Prediction via model use
213 *================================================================================================================*/
214
215 std::size_t predict_cardinality(const DataModel &data) const override;
216
217 private:
218 void print(std::ostream &out) const override;
219};
220
227struct M_EXPORT InjectionCardinalityEstimator : CardinalityEstimatorCRTP<InjectionCardinalityEstimator>
228{
230
232 {
234
235 private:
237 std::size_t size_;
238
239 public:
240 InjectionCardinalityDataModel(Subproblem S, std::size_t size) : subproblem_(S), size_(size) { }
245
246 void assign_to(Subproblem s) override { subproblem_ = s; }
247 };
248
249 private:
251 mutable std::vector<char> buf_;
253 mutable std::ostringstream oss_;
254
255 std::unordered_map<ThreadSafePooledString, std::size_t> cardinality_table_;
257
258 public:
265
271 InjectionCardinalityEstimator(Diagnostic &diag, ThreadSafePooledString name_of_database, std::istream &in);
272
274
277
279
280
281 /*==================================================================================================================
282 * Model calculation
283 *================================================================================================================*/
284
285 std::unique_ptr<DataModel> empty_model() const override;
286 std::unique_ptr<DataModel> estimate_scan(const QueryGraph &G, Subproblem P) const override;
287 std::unique_ptr<DataModel>
288 estimate_filter(const QueryGraph &G, const DataModel &data, const cnf::CNF &filter) const override;
289 std::unique_ptr<DataModel>
290 estimate_limit(const QueryGraph &G, const DataModel &data, std::size_t limit, std::size_t offset) const override;
291 std::unique_ptr<DataModel>
292 estimate_grouping(const QueryGraph &G, const DataModel &data, const std::vector<group_type> &groups) const override;
293 std::unique_ptr<DataModel>
294 estimate_join(const QueryGraph &G, const DataModel &left, const DataModel &right,
295 const cnf::CNF &condition) const override;
296
297 template<typename PlanTable>
298 std::unique_ptr<DataModel>
299 operator()(estimate_join_all_tag, PlanTable &&PT, const QueryGraph &G, Subproblem to_join,
300 const cnf::CNF &condition) const;
301
302 /*==================================================================================================================
303 * Prediction via model use
304 *================================================================================================================*/
305
306 std::size_t predict_cardinality(const DataModel &data) const override;
307
308 private:
309 void read_json(Diagnostic &diag, std::istream &in, const ThreadSafePooledString &name_of_database);
310 void print(std::ostream &out) const override;
311 void buf_append(const char *s) const { while (*s) buf_.emplace_back(*s++); }
312 void buf_append(const std::string &s) const {
313 buf_.reserve(buf_.size() + s.size());
314 buf_append(s.c_str());
315 }
316 const char * buf_view() const { return buf_.data(); }
317 ThreadSafePooledString make_identifier(const QueryGraph &G, const Subproblem S) const;
318};
319
323struct M_EXPORT SpnEstimator : CardinalityEstimatorCRTP<SpnEstimator>
324{
325 using SpnIdentifier = std::pair<ThreadSafePooledString, ThreadSafePooledString>;
326 using SpnJoin = std::pair<SpnIdentifier, SpnIdentifier>;
327 using table_spn_map = std::unordered_map<ThreadSafePooledString, std::reference_wrapper<const SpnWrapper>>;
328
330 {
331 friend struct SpnEstimator;
332
333 private:
335 std::vector<std::size_t> max_frequencies_;
336 std::size_t num_rows_;
337
338 public:
339 SpnDataModel() = default;
340 SpnDataModel(table_spn_map spns, std::size_t num_rows)
341 : spns_(std::move(spns))
342 , num_rows_(num_rows)
343 { }
344
345 void assign_to(Subproblem) override { /* nothing to be done */ }
346 };
347
348 private:
350 std::unordered_map<ThreadSafePooledString, SpnWrapper*> table_to_spn_;
353
354 public:
355 explicit SpnEstimator(ThreadSafePooledString name_of_database) : name_of_database_(std::move(name_of_database)) { }
356
358
360 void learn_spns();
361
363 void learn_new_spn(const ThreadSafePooledString &name_of_table);
364
365 private:
372 static std::pair<unsigned, bool> find_spn_id(const SpnDataModel &data, SpnJoin &join);
373
380 static std::size_t max_frequency(const SpnDataModel &data, SpnJoin &join);
381
388 static std::size_t max_frequency(const SpnDataModel &data, const ThreadSafePooledString &attribute);
389
390 /*==================================================================================================================
391 * Model calculation
392 *================================================================================================================*/
393
394 public:
395 std::unique_ptr<DataModel> empty_model() const override;
396 std::unique_ptr<DataModel> estimate_scan(const QueryGraph &G, Subproblem P) const override;
397 std::unique_ptr<DataModel>
398 estimate_filter(const QueryGraph &G, const DataModel &data, const cnf::CNF &filter) const override;
399 std::unique_ptr<DataModel>
400 estimate_limit(const QueryGraph &G, const DataModel &data, std::size_t limit, std::size_t offset) const override;
401 std::unique_ptr<DataModel>
402 estimate_grouping(const QueryGraph &G, const DataModel &data, const std::vector<group_type> &groups) const override;
403 std::unique_ptr<DataModel>
404 estimate_join(const QueryGraph &G, const DataModel &left, const DataModel &right,
405 const cnf::CNF &condition) const override;
406
407 template<typename PlanTable>
408 std::unique_ptr<DataModel>
409 operator()(estimate_join_all_tag, PlanTable &&PT, const QueryGraph &G, Subproblem to_join,
410 const cnf::CNF &condition) const;
411
412
413 /*==================================================================================================================
414 * Prediction via model use
415 *================================================================================================================*/
416
417 std::size_t predict_cardinality(const DataModel &data) const override;
418
419 private:
420 void print(std::ostream &out) const override;
421};
422
423}
struct @5 args
‍mutable namespace
Definition: Backend.hpp:10
ThreadSafeStringPool::proxy_type ThreadSafePooledString
Definition: Pool.hpp:464
ThreadSafeStringPool::proxy_optional_type ThreadSafePooledOptionalString
Definition: Pool.hpp:465
STL namespace.
data_model_exception is thrown if a DataModel implementation does not contain the requested informati...
virtual std::unique_ptr< DataModel > estimate_filter(const QueryGraph &G, const DataModel &data, const cnf::CNF &filter) const =0
Applies a filter to a DataModel.
virtual std::unique_ptr< DataModel > estimate_grouping(const QueryGraph &G, const DataModel &data, const std::vector< group_type > &groups) const =0
Groups data in the DataModel.
virtual std::unique_ptr< DataModel > empty_model() const =0
Returns a DataModel representing the empty set.
std::unique_ptr< DataModel > estimate_join_all(const QueryGraph &G, const PlanTable &PT, Subproblem to_join, const cnf::CNF &condition) const
Compute a DataModel for the result of joining all DataSources in to_join by condition.
virtual std::unique_ptr< DataModel > estimate_scan(const QueryGraph &G, Subproblem P) const =0
Creates a DataModel for a single DataSource.
virtual std::unique_ptr< DataModel > estimate_limit(const QueryGraph &G, const DataModel &data, const std::size_t limit, const std::size_t offset) const =0
Extracts a subset from a DataModel.
M_LCOV_EXCL_START friend std::ostream & operator<<(std::ostream &out, const CardinalityEstimator &CE)
virtual std::unique_ptr< DataModel > estimate_join(const QueryGraph &G, const DataModel &left, const DataModel &right, const cnf::CNF &condition) const =0
Form a new DataModel by joining two DataModels.
virtual void print(std::ostream &out) const =0
virtual std::size_t predict_cardinality(const DataModel &data) const =0
std::pair< std::reference_wrapper< const ast::Expr >, ThreadSafePooledOptionalString > group_type
void assign_to(Subproblem) override
Assigns this to the Subproblem s, i.e.
DummyEstimator that always returns the size of the cartesian product of the given subproblems.
CartesianProductEstimator(ThreadSafePooledString)
A DataModel describes a data set.
virtual void assign_to(Subproblem s)=0
Assigns this to the Subproblem s, i.e.
InjectionCardinalityDataModel & operator=(const InjectionCardinalityDataModel &other)=default
InjectionCardinalityDataModel(const InjectionCardinalityDataModel &)=default
InjectionCardinalityDataModel(InjectionCardinalityDataModel &&)=default
InjectionCardinalityDataModel & operator=(InjectionCardinalityDataModel &&other)=default
void assign_to(Subproblem s) override
Assigns this to the Subproblem s, i.e.
InjectionCardinalityEstimator that estimates cardinalities based on a table that contains sizes for t...
std::unordered_map< ThreadSafePooledString, std::size_t > cardinality_table_
std::vector< char > buf_
‍buffer used to construct identifiers
InjectionCardinalityEstimator & operator=(InjectionCardinalityEstimator &&)=default
InjectionCardinalityEstimator(const InjectionCardinalityEstimator &)=delete
void buf_append(const std::string &s) const
std::ostringstream oss_
‍buffer used to construct identifiers
void buf_append(const char *s) const
InjectionCardinalityEstimator(InjectionCardinalityEstimator &&)=default
An Operator represents an operation in a query plan.
Definition: Operator.hpp:45
This table represents all explored plans with their sub-plans, estimated size, cost,...
Definition: PlanTable.hpp:284
This table represents all explored plans with their sub-plans, estimated size, cost,...
Definition: PlanTable.hpp:180
The query graph represents all data sources and joins in a graph structure.
Definition: QueryGraph.hpp:172
Implements a small and efficient set over integers in the range of 0 to 63 (including).
Definition: ADT.hpp:26
SpnDataModel(table_spn_map spns, std::size_t num_rows)
table_spn_map spns_
a map from table to Spn
void assign_to(Subproblem) override
Assigns this to the Subproblem s, i.e.
std::vector< std::size_t > max_frequencies_
the maximum frequencies of values of attributes to join
SpnEstimator that estimates cardinalities based on Sum-Product Networks.
std::unordered_map< ThreadSafePooledString, SpnWrapper * > table_to_spn_
‍the map from every table to its respective Spn, initially empty
ThreadSafePooledString name_of_database_
‍the name of the database, the estimator is built on
std::pair< SpnIdentifier, SpnIdentifier > SpnJoin
std::unordered_map< ThreadSafePooledString, std::reference_wrapper< const SpnWrapper > > table_spn_map
std::pair< ThreadSafePooledString, ThreadSafePooledString > SpnIdentifier
SpnEstimator(ThreadSafePooledString name_of_database)
A wrapper class for an Spn to be used in the context of databases.
Definition: SpnWrapper.hpp:13
A helper class to introduce a virtual method overload per type to a class hierarchy.
Definition: crtp.hpp:93
An expression.
Definition: AST.hpp:39
A CNF represents a conjunction of cnf::Clauses.
Definition: CNF.hpp:134
Learn an SPN on every table in the database that is currently in use.