mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
QueryGraph.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <cstdint>
4#include <cstring>
5#include <deque>
6#include <functional>
7#include <memory>
9#include <mutable/IR/CNF.hpp>
10#include <mutable/mutable-config.hpp>
12#include <mutable/util/ADT.hpp>
13#include <sstream>
14#include <string>
15#include <vector>
16
17
18namespace m {
19
20namespace ast {
21
22struct Stmt;
23
24}
25
26struct DataSource;
27struct Join;
28struct QueryGraph;
29
32struct M_EXPORT DataSource
33{
34 friend struct QueryGraph;
35 friend struct GraphBuilder;
36 friend struct Decorrelation;
37
38 private:
40 std::vector<std::reference_wrapper<Join>> joins_;
42 std::size_t id_;
43
44 bool decorrelated_ = true;
45
46 protected:
47 DataSource(std::size_t id, ThreadSafePooledOptionalString alias) : alias_(std::move(alias)), id_(id) {
48 if (alias_.has_value() and strlen(*alias_) == 0)
49 throw invalid_argument("if the data source has an alias, it must not be empty");
50 }
51
52 public:
53 virtual ~DataSource() { }
54
56 std::size_t id() const { return id_; }
58 const ThreadSafePooledOptionalString & alias() const { return alias_; }
63 const cnf::CNF & filter() const { return filter_; }
65 void update_filter(cnf::CNF filter) { filter_ = filter_ and filter; }
67 void add_join(Join &join) { joins_.emplace_back(join); }
69 const auto & joins() const { return joins_; }
70
72 virtual bool is_correlated() const = 0;
73
74 private:
75 void remove_join(Join &join) {
76 auto it = std::find_if(joins_.begin(), joins_.end(), [&join](auto j) { return &j.get() == &join; });
77 if (it == joins_.end())
78 throw invalid_argument("given join not found");
79 joins_.erase(it);
80 }
81
82 public:
83 bool operator==(const DataSource &other) const { return this->id_ == other.id_; }
84 bool operator!=(const DataSource &other) const { return not operator==(other); }
85};
86
88struct M_EXPORT BaseTable : DataSource
89{
90 friend struct QueryGraph;
91 friend struct GetPrimaryKey;
92
93 private:
94 const Table &table_;
96 // std::vector<const ast::Designator*> expansion_;
97
98 private:
99 BaseTable(std::size_t id, ThreadSafePooledOptionalString alias, const Table &table)
100 : DataSource(id, std::move(alias)), table_(table)
101 { }
102
103 public:
105 const Table & table() const { return table_; }
106
107 ThreadSafePooledOptionalString name() const override { return alias().has_value() ? alias() : table_.name(); }
108
110 bool is_correlated() const override { return false; };
111};
112
115struct M_EXPORT Query : DataSource
116{
117 friend struct QueryGraph;
118
119 private:
120 std::unique_ptr<QueryGraph> query_graph_;
121
122 private:
123 Query(std::size_t id, ThreadSafePooledOptionalString alias, std::unique_ptr<QueryGraph> query_graph)
124 : DataSource(id, std::move(alias)), query_graph_(std::move(query_graph))
125 { }
126
127 public:
129 QueryGraph & query_graph() const { return *query_graph_; }
130
131 ThreadSafePooledOptionalString name() const override { return alias(); }
132
133 bool is_correlated() const override;
134};
135
137struct M_EXPORT Join
138{
139 using sources_t = std::vector<std::reference_wrapper<DataSource>>;
140
141 private:
144
145 public:
146 Join(cnf::CNF condition, sources_t sources) : condition_(std::move(condition)) , sources_(std::move(sources)) { }
147
149 const cnf::CNF & condition() const { return condition_; }
151 void update_condition(cnf::CNF update) { condition_ = condition_ and update; }
153 const sources_t & sources() const { return sources_; }
154
155 bool operator==(const Join &other) const {
156 if (this->condition_ != other.condition_) return false;
157 if (this->sources().size() != other.sources().size()) return false;
158 for (auto &this_src : sources_) {
159 auto it = std::find_if(other.sources().begin(), other.sources().end(), [this_src](auto other_src) {
160 return &this_src.get() == &other_src.get();
161 });
162 if (it == other.sources().end()) return false;
163 }
164 return true;
165 }
166 bool operator!=(const Join &other) const { return not operator==(other); }
167};
168
171struct M_EXPORT QueryGraph
172{
173 friend struct GraphBuilder;
174 friend struct Decorrelation;
175 friend struct GetPrimaryKey;
176
178 using projection_type = std::pair<std::reference_wrapper<const ast::Expr>, ThreadSafePooledOptionalString>;
179 using order_type = std::pair<std::reference_wrapper<const ast::Expr>, bool>;
180 using group_type = std::pair<std::reference_wrapper<const ast::Expr>, ThreadSafePooledOptionalString>;
181
182 private:
183 std::vector<std::unique_ptr<DataSource>> sources_;
184 std::vector<std::unique_ptr<Join>> joins_;
185
186 std::vector<group_type> group_by_;
187 std::vector<std::reference_wrapper<const ast::FnApplicationExpr>> aggregates_;
188 std::vector<projection_type> projections_;
189 std::vector<order_type> order_by_;
190 struct { uint64_t limit = 0, offset = 0; } limit_;
191
192 mutable std::unique_ptr<AdjacencyMatrix> adjacency_matrix_;
193
196 std::vector<std::unique_ptr<ast::Expr>> custom_filter_exprs_;
197
198 public:
199 friend void swap(QueryGraph &first, QueryGraph &second) {
200 using std::swap;
201 swap(first.sources_, second.sources_);
202 swap(first.joins_, second.joins_);
203 swap(first.group_by_, second.group_by_);
204 swap(first.aggregates_, second.aggregates_);
205 swap(first.projections_, second.projections_);
206 swap(first.order_by_, second.order_by_);
207 swap(first.limit_, second.limit_);
209 swap(first.t_, second.t_);
211 }
212
213 QueryGraph();
214 ~QueryGraph();
215
216 QueryGraph(const QueryGraph&) = delete;
217 QueryGraph(QueryGraph &&other) : QueryGraph() { swap(*this, other); }
218
219 QueryGraph & operator=(QueryGraph &&other) { swap(*this, other); return *this; }
220
221 static std::unique_ptr<QueryGraph> Build(const ast::Stmt &stmt);
222
224 std::size_t num_sources() const { return sources_.size(); }
225
227 std::size_t num_joins() const { return joins_.size(); }
228
229 void add_source(std::unique_ptr<DataSource> source) {
230 source->id_ = sources_.size();
231 sources_.emplace_back(source.release());
232 }
234 std::unique_ptr<BaseTable> base(new BaseTable(sources_.size(), std::move(alias), table));
235 auto &ref = sources_.emplace_back(std::move(base));
236 return as<BaseTable>(*ref);
237 }
238 Query & add_source(ThreadSafePooledOptionalString alias, std::unique_ptr<QueryGraph> query_graph) {
239 std::unique_ptr<Query> Q(new Query(sources_.size(), std::move(alias), std::move(query_graph)));
240 auto &ref = sources_.emplace_back(std::move(Q));
241 return as<Query>(*ref);
242 }
243
244 std::unique_ptr<DataSource> remove_source(std::size_t id) {
245 auto it = std::next(sources_.begin(), id);
246 auto ds = std::move(*it);
247 M_insist(ds->id() == id, "IDs of sources must be sequential");
248 sources_.erase(it);
249 /* Decrement IDs of all sources after the deleted one to provide sequential IDs. */
250 while (it != sources_.end()) {
251 --(*it)->id_;
252 ++it;
253 }
254 return ds;
255 }
256
257 const auto & sources() const { return sources_; }
258 const auto & joins() const { return joins_; }
259 const auto & group_by() const { return group_by_; }
260 const auto & aggregates() const { return aggregates_; }
261 const std::vector<projection_type> & projections() const { return projections_; }
262 const auto & order_by() const { return order_by_; }
263 auto limit() const { return limit_; }
264
266 const DataSource & operator[](uint64_t id) const {
267 auto &ds = sources_[id];
268 M_insist(ds->id() == id, "given id and data source id must match");
269 return *ds;
270 }
271
273 bool grouping() const { return not group_by_.empty() or not aggregates_.empty(); }
275 bool is_correlated() const;
276
278 if (not adjacency_matrix_) [[unlikely]]
279 compute_adjacency_matrix();
280 return *adjacency_matrix_;
281 }
282
284 if (not adjacency_matrix_) [[unlikely]]
285 compute_adjacency_matrix();
286 return *adjacency_matrix_;
287 }
288
290 void dot(std::ostream &out) const;
291
293 void sql(std::ostream &out) const;
294
297 M_insist(not t_ or *t_ == *t, "QueryGraph is already linked to another transaction");
298 t_ = t;
299
300 for (auto &ds : sources_)
301 if (auto bt = cast<const Query>(ds.get()))
302 bt->query_graph_->transaction(t_);
303 }
305 Scheduler::Transaction * transaction() const { return t_; }
306
308 void add_custom_filter(std::unique_ptr<ast::Expr> filter_expr, DataSource &ds) {
309 M_insist(std::find_if(sources_.begin(), sources_.end(), [&](std::unique_ptr<DataSource> &source){
310 return *source == ds;
311 }) != sources_.end());
312
313 auto filter = cnf::to_CNF(*filter_expr);
314 ds.update_filter(filter);
315 custom_filter_exprs_.push_back(std::move(filter_expr));
316 }
317
318
319 void dump(std::ostream &out) const;
320 void dump() const;
321
322 private:
323 void compute_adjacency_matrix() const;
324 void dot_recursive(std::ostream &out) const;
325
326 void remove_join(Join &join) {
327 auto it = std::find_if(joins_.begin(), joins_.end(), [&join](auto &j) { return j.get() == &join; });
328 if (it == joins_.end())
329 throw invalid_argument("given join not found");
330 joins_.erase(it);
331 }
332};
333
334}
#define id(X)
and(sizeof(T)==4) U64x1 reinterpret_to_U64(m
Definition: WasmAlgo.cpp:266
#define M_insist(...)
Definition: macro.hpp:129
‍mutable namespace
Definition: Backend.hpp:10
void swap(PlanTableBase< Actual > &first, PlanTableBase< Actual > &second)
Definition: PlanTable.hpp:394
ThreadSafeStringPool::proxy_optional_type ThreadSafePooledOptionalString
Definition: Pool.hpp:465
STL namespace.
An adjacency matrix for a given query graph.
A BaseTable is a DataSource that is materialized and stored persistently by the database system.
Definition: QueryGraph.hpp:89
bool is_correlated() const override
BaseTable is never correlated.
Definition: QueryGraph.hpp:110
ThreadSafePooledOptionalString name() const override
Returns the name of this DataSource.
Definition: QueryGraph.hpp:107
const Table & table() const
Returns a reference to the Table providing the tuples.
Definition: QueryGraph.hpp:105
BaseTable(std::size_t id, ThreadSafePooledOptionalString alias, const Table &table)
‍list of designators expanded from GetPrimaryKey::compute() or GetAttributes::compute()
Definition: QueryGraph.hpp:99
const Table & table_
the table providing the tuples
Definition: QueryGraph.hpp:94
A DataSource in a QueryGraph.
Definition: QueryGraph.hpp:33
std::size_t id_
unique identifier of this data source within its query graph
Definition: QueryGraph.hpp:42
const cnf::CNF & filter() const
Returns the filter of this DataSource.
Definition: QueryGraph.hpp:63
virtual ThreadSafePooledOptionalString name() const =0
Returns the name of this DataSource.
const ThreadSafePooledOptionalString & alias() const
Returns the alias of this DataSource.
Definition: QueryGraph.hpp:58
const auto & joins() const
Returns a reference to the Joins using this DataSource.
Definition: QueryGraph.hpp:69
DataSource(std::size_t id, ThreadSafePooledOptionalString alias)
Definition: QueryGraph.hpp:47
std::vector< std::reference_wrapper< Join > > joins_
joins with this data source
Definition: QueryGraph.hpp:40
virtual bool is_correlated() const =0
Returns true iff the data source is correlated.
ThreadSafePooledOptionalString alias_
alias of this data source, may not have a value if this data source has no alias
Definition: QueryGraph.hpp:41
void update_filter(cnf::CNF filter)
Adds filter to the current filter of this DataSource by logical conjunction.
Definition: QueryGraph.hpp:65
cnf::CNF filter_
filter condition on this data source
Definition: QueryGraph.hpp:39
virtual ~DataSource()
Definition: QueryGraph.hpp:53
void remove_join(Join &join)
Definition: QueryGraph.hpp:75
bool operator==(const DataSource &other) const
Definition: QueryGraph.hpp:83
void add_join(Join &join)
Adds join to the set of Joins of this DataSource.
Definition: QueryGraph.hpp:67
std::size_t id() const
Returns the id of this DataSource.
Definition: QueryGraph.hpp:56
bool operator!=(const DataSource &other) const
Definition: QueryGraph.hpp:84
A Join in a QueryGraph combines DataSources by a join condition.
Definition: QueryGraph.hpp:138
std::vector< std::reference_wrapper< DataSource > > sources_t
Definition: QueryGraph.hpp:139
const cnf::CNF & condition() const
Returns the join condition.
Definition: QueryGraph.hpp:149
bool operator==(const Join &other) const
Definition: QueryGraph.hpp:155
sources_t sources_
the sources to join
Definition: QueryGraph.hpp:143
cnf::CNF condition_
join condition
Definition: QueryGraph.hpp:142
void update_condition(cnf::CNF update)
Adds condition to the current condition of this Join by logical conjunction.
Definition: QueryGraph.hpp:151
bool operator!=(const Join &other) const
Definition: QueryGraph.hpp:166
Join(cnf::CNF condition, sources_t sources)
Definition: QueryGraph.hpp:146
const sources_t & sources() const
Returns a reference to the joined DataSources.
Definition: QueryGraph.hpp:153
The query graph represents all data sources and joins in a graph structure.
Definition: QueryGraph.hpp:172
std::size_t num_joins() const
Returns the number of Joins in this graph.
Definition: QueryGraph.hpp:227
std::vector< order_type > order_by_
the order
Definition: QueryGraph.hpp:189
friend void swap(QueryGraph &first, QueryGraph &second)
Definition: QueryGraph.hpp:199
struct m::QueryGraph::@0 limit_
limit: limit and offset
QueryGraph & operator=(QueryGraph &&other)
Definition: QueryGraph.hpp:219
std::vector< std::unique_ptr< Join > > joins_
collection of all joins in this query graph
Definition: QueryGraph.hpp:184
const auto & group_by() const
Definition: QueryGraph.hpp:259
void add_source(std::unique_ptr< DataSource > source)
Definition: QueryGraph.hpp:229
const std::vector< projection_type > & projections() const
Definition: QueryGraph.hpp:261
const DataSource & operator[](uint64_t id) const
Returns a data souce given its id.
Definition: QueryGraph.hpp:266
QueryGraph(QueryGraph &&other)
Definition: QueryGraph.hpp:217
bool grouping() const
Returns true iff the graph contains a grouping.
Definition: QueryGraph.hpp:273
Scheduler::Transaction * transaction() const
Returns the transaction ID.
Definition: QueryGraph.hpp:305
std::vector< std::unique_ptr< ast::Expr > > custom_filter_exprs_
‍Stores the expressions of custom filters that have been added to the DataSources after semantic anal...
Definition: QueryGraph.hpp:196
const auto & joins() const
Definition: QueryGraph.hpp:258
std::pair< std::reference_wrapper< const ast::Expr >, ThreadSafePooledOptionalString > projection_type
Definition: QueryGraph.hpp:178
std::pair< std::reference_wrapper< const ast::Expr >, bool > order_type
true means ascending, false means descending
Definition: QueryGraph.hpp:179
QueryGraph(const QueryGraph &)=delete
const AdjacencyMatrix & adjacency_matrix() const
Definition: QueryGraph.hpp:283
std::size_t num_sources() const
Returns the number of DataSources in this graph.
Definition: QueryGraph.hpp:224
void add_custom_filter(std::unique_ptr< ast::Expr > filter_expr, DataSource &ds)
Creates a cnf::CNF from filter_expr and adds it to the current filter of the given DataSource ds by l...
Definition: QueryGraph.hpp:308
const auto & aggregates() const
Definition: QueryGraph.hpp:260
void remove_join(Join &join)
Definition: QueryGraph.hpp:326
const auto & order_by() const
Definition: QueryGraph.hpp:262
std::vector< std::reference_wrapper< const ast::FnApplicationExpr > > aggregates_
the aggregates to compute
Definition: QueryGraph.hpp:187
const auto & sources() const
Definition: QueryGraph.hpp:257
std::unique_ptr< DataSource > remove_source(std::size_t id)
Definition: QueryGraph.hpp:244
std::vector< projection_type > projections_
the data to compute
Definition: QueryGraph.hpp:188
auto limit() const
Definition: QueryGraph.hpp:263
BaseTable & add_source(ThreadSafePooledOptionalString alias, const Table &table)
Definition: QueryGraph.hpp:233
std::pair< std::reference_wrapper< const ast::Expr >, ThreadSafePooledOptionalString > group_type
Definition: QueryGraph.hpp:180
void transaction(Scheduler::Transaction *t)
Set the transaction ID.
Definition: QueryGraph.hpp:296
Scheduler::Transaction * t_
the transaction this query graph belongs to
Definition: QueryGraph.hpp:194
std::unique_ptr< AdjacencyMatrix > adjacency_matrix_
Definition: QueryGraph.hpp:192
std::vector< group_type > group_by_
the grouping keys
Definition: QueryGraph.hpp:186
std::vector< std::unique_ptr< DataSource > > sources_
collection of all data sources in this query graph
Definition: QueryGraph.hpp:183
Query & add_source(ThreadSafePooledOptionalString alias, std::unique_ptr< QueryGraph > query_graph)
Definition: QueryGraph.hpp:238
AdjacencyMatrix & adjacency_matrix()
Definition: QueryGraph.hpp:277
A Query in a QueryGraph is a DataSource that represents a nested query.
Definition: QueryGraph.hpp:116
Query(std::size_t id, ThreadSafePooledOptionalString alias, std::unique_ptr< QueryGraph > query_graph)
Definition: QueryGraph.hpp:123
std::unique_ptr< QueryGraph > query_graph_
query graph of the sub-query
Definition: QueryGraph.hpp:120
ThreadSafePooledOptionalString name() const override
Returns the name of this DataSource.
Definition: QueryGraph.hpp:131
QueryGraph & query_graph() const
Returns a reference to the internal QueryGraph.
Definition: QueryGraph.hpp:129
Implements a small and efficient set over integers in the range of 0 to 63 (including).
Definition: ADT.hpp:26
A table is a sorted set of attributes.
Definition: Schema.hpp:388
virtual const ThreadSafePooledString & name() const =0
Returns the name of the Table.
A SQL statement.
Definition: AST.hpp:794
A CNF represents a conjunction of cnf::Clauses.
Definition: CNF.hpp:134
Signals that an argument to a function of method was invalid.
Definition: exception.hpp:37