mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
QueryGraph.cpp
Go to the documentation of this file.
1#include "IR/QueryGraph.hpp"
2
4#include "parse/ASTDumper.hpp"
6#include <mutable/IR/CNF.hpp>
9
10
11using namespace m;
12
13
14struct Decorrelation;
15
16
17/*======================================================================================================================
18 * Query
19 *====================================================================================================================*/
20
21bool Query::is_correlated() const { return query_graph_->is_correlated(); }
22
23
24/*======================================================================================================================
25 * Decorrelation of nested queries
26 * -------------------------------
27 *
28 * DEFINITION: A predicate is *correlated* if it contains at least one *free* variable *and* at least one *bound*
29 * variable.
30 *
31 * NOTE: A free variable *must* be bound by an outer statement, otherwise the query is ill-formed (and should not have
32 * passed semantic analysis).
33 *
34 * EXAMPLES:
35 * (1)
36 * ```
37 * SELECT T.id
38 * FROM T
39 * WHERE T.n = (
40 * SELECT COUNT(*)
41 * FROM S
42 * WHERE S.x = T.y -- T.y is a free variable, bound by the outer statement.
43 * )
44 * ```
45 *
46 * ```
47 * ⧑ (T.n = S.$agg0) ← dependent join
48 * / \
49 * T Γ (; COUNT(*) AS $agg0)
50 * |
51 * σ (S.x = T.y) ← correlated predicate
52 * |
53 * S
54 * ```
55 *
56 * To decorrelate T.y, group S by S.x, then lift join condition above grouping:
57 *
58 * ```
59 * SELECT T.id
60 * FROM T, (
61 * SELECT S.x, COUNT(*) AS $agg0
62 * FROM S
63 * GROUP BY S.x -- group by the correlated equi-predicate
64 * ) AS S
65 * WHERE S.x = T.y -- unnest the inner query by joining with outer on the equi-predicate
66 * AND T.n = S.$agg0 -- and join on the transformed original predicate
67 * ```
68 *
69 * ```
70 * ⨝ (S.x = T.y AND T.n = S.$agg0) ← regular join
71 * / \
72 * T Γ (S.x; COUNT(*) AS $agg0) ← group by attribute of correlated predicate
73 * |
74 * S
75 * ```
76 *
77 * (2)
78 * ```
79 * SELECT T.id
80 * FROM T
81 * WHERE T.n = (
82 * SELECT COUNT(*)
83 * FROM S
84 * WHERE S.x = ISNULL(T.y) -- T.y is a free variable, bound by the outer statement.
85 * )
86 * ```
87 *
88 * ```
89 * ⧑ (T.n = S.$agg0) ← dependent join
90 * / \
91 * T Γ (; COUNT(*) AS $agg0)
92 * |
93 * σ (S.x = ISNULL(T.y)) ← correlated predicate
94 * |
95 * S
96 * ```
97 *
98 * To decorrelate T.y, group S by S.x, then lift join condition above grouping:
99 *
100 * ```
101 * SELECT T.id
102 * FROM T, (
103 * SELECT S.x, COUNT(*) as $agg0
104 * FROM S
105 * GROUP BY S.x -- group by the correlated equi-predicate
106 * ) AS S
107 * WHERE S.x = ISNULL(T.y) -- unnest the inner query by joining with outer on the equi-predicate
108 * AND T.n = S.$agg0 -- and join on the transformed original predicate
109 * ```
110 *
111 * ```
112 * ⨝ (S.x = ISNULL(T.y) AND T.n = S.$agg0) ← dependent join
113 * / \
114 * T Γ (S.x; COUNT(*) AS $agg0) ← group by attribute of correlated predicate
115 * |
116 * S
117 * ```
118 *
119 * (3)
120 * ```
121 * SELECT T.id
122 * FROM T
123 * WHERE T.n = (
124 * SELECT COUNT(*)
125 * FROM S
126 * WHERE ISNULL(S.x) = T.y -- T.y is a free variable, bound by the outer statement.
127 * )
128 * ```
129 *
130 * is converted to
131 *
132 * ```
133 * SELECT T.id
134 * FROM T, (
135 * SELECT $expr0, COUNT(*) AS $agg0
136 * FROM S
137 * GROUP BY ISNULL(S.x) AS $expr0
138 * ) AS S
139 * WHERE S.$expr0 = T.y
140 * AND T.n = S.$agg0
141 * ```
142 *
143 * (4)
144 * ```
145 * SELECT T.id
146 * FROM T
147 * WHERE EXISTS (
148 * SELECT 1
149 * FROM S
150 * WHERE S.a + S.b = T.c -- T.c is a free variable, bound by the outer statement.
151 * )
152 * ```
153 *
154 * ```
155 * ⧑ (TRUE) ← dependent join without condition (not a Cartesian product!)
156 * / \
157 * T EXISTS
158 * |
159 * σ (S.a + S.b = T.c) ← correlated predicate
160 * |
161 * S
162 * ```
163 *
164 * is converted to
165 *
166 * ```
167 * SELECT T.id
168 * FROM T, (
169 * SELECT $expr0
170 * FROM S
171 * GROUP BY S.a + S.b AS $expr0
172 * ) AS S
173 * WHERE T.c = S.$expr0
174 * ```
175 *
176 * ```
177 * ⨝ (T.c = S.$expr0)
178 * / \
179 * T Γ (S.a + S.b AS $expr0; )
180 * |
181 * S
182 * ```
183 *
184 * (5)
185 * ```
186 * SELECT T.id
187 * FROM T
188 * WHERE EXISTS (
189 * SELECT 1
190 * FROM S
191 * WHERE S.a + T.b = S.c -- T.b is a free variable, bound by the outer statement.
192 * )
193 * ```
194 *
195 * ```
196 * ⧑ (TRUE) ← dependent join without condition (not a Cartesian product!)
197 * / \
198 * T EXISTS
199 * |
200 * σ (S.a + T.b = S.c) ← correlated predicate
201 * |
202 * S
203 * ```
204 *
205 * **cannot** be converted to
206 *
207 * ```
208 * SELECT T.id
209 * FROM T, (
210 * SELECT S.a, S.c
211 * FROM S
212 * GROUP BY S.a, S.c -- FIXME This is incorrect. We should transform the predicate and group by S.c - S.a
213 * ) AS S
214 * WHERE S.a + T.b = S.c -- TODO Hypothetically, we must rewrite to T.b = S.c - S.a to have single group
215 * ```
216 *
217 * ```
218 * ⨝ (T.b = S.a + S.c)
219 * / \
220 * T Γ (S.a, S.b; )
221 * |
222 * S
223 * ```
224 *
225 * (6)
226 * ```
227 * SELECT T.id
228 * FROM T
229 * WHERE T.max = (
230 * SELECT MAX(S.a * T.b)
231 * FROM S
232 * WHERE S.c = T.c
233 * )
234 * ```
235 *
236 * is converted to
237 *
238 * ```
239 * SELECT T.id
240 * FROM T, (
241 * SELECT S.a, S.c
242 * FROM S
243 * GROUP BY S.a, S.c
244 * ) AS S
245 * WHERE T.max = S.a * T.b
246 * AND T.c = S.c
247 * ```
248 *
249 * (7)
250 * ```
251 * SELECT T.id
252 * FROM T
253 * WHERE T.max != ( -- non-equi predicate here is valid, its not a correlated predicate
254 * SELECT COUNT(*)
255 * FROM S
256 * WHERE S.x = T.y
257 * )
258 * ```
259 *
260 * is converted to
261 *
262 * ```
263 * SELECT T.id
264 * FROM T, (
265 * SELECT S.x, COUNT(*)
266 * FROM S
267 * GROUP BY S.x
268 * ) AS S
269 * WHERE T.max != S.x
270 * AND S.x = T.y
271 * ```
272 *
273 * (8)
274 * ```
275 * SELECT T.id
276 * FROM T
277 * WHERE T.max == (
278 * SELECT COUNT(*)
279 * FROM S
280 * WHERE S.x != T.y -- non-equi predicate here is correlated
281 * )
282 * ```
283 *
284 * ```
285 * ⧑ (T.max = $agg0) ← dependent join
286 * / \
287 * T Γ (; COUNT(*) AS $agg0)
288 * |
289 * σ (S.x != T.y) ← correlated predicate
290 * |
291 * S
292 * ```
293 *
294 * We currently can't do better than a dependent join. We cannout lift the correlated predicate above the grouping
295 * of the correlated query.
296 * We could lift the correlated predicate and the grouping above the dependent join. This would transform the
297 * dependent join to a regular join with the correlated predicate as join predicate and the original join predicate
298 * hoisted above grouping. See below:
299 *
300 * ```
301 * σ (T.max = $agg0) ← dependent join
302 * |
303 * Γ (T.id; COUNT(*) AS $agg0) ← grouping by PK of T implies grouping by all of T
304 * |
305 * ⨝ (S.x != T.y) ← formerly correlated predicate becomes decorrelated join predicate
306 * / \
307 * T S
308 * ```
309 *
310 * TASKS:
311 *
312 * (1) Analyze the WHERE and HAVING clauses of a query for correlations.
313 *
314 * (2) To analyze a clause, we must analyze each predicate individually. So far, we only support a single predicate
315 * inside a correlated clause. Abort, if a correlated clause has more than one predicate.
316 *
317 * (3) For each predicate, check whether it has free variables. If this is not the case, we treat it as a regular
318 * predicate.
319 *
320 * (4) If all predicates of a clause contain solely *free* variables, the entire clause can be lifted out of the nested
321 * query and into the statement that *binds* the free variables.
322 *
323 * (5) If the predicate is correlated, we recursively analyze the predicate's expression (pre order). If we find an
324 * *n*-ary expression (e.g. binary or function application) that is correlated, we abort. The reasoning is that *n*-ary
325 * expressions cannot be broken apart into uncorrelated subexpression *while preserving query semantics*. An
326 * **important exception** to this rule are equi-predicates (i.e. `=`).
327 *
328 * (6) If the correlated expression is an equi-predicate of the form E1 = E2, recursively analyze E1 and E2. If E1 or E2
329 * are correlated, we abort (see (5)). If both E1 and E2 are uncorrelated, then one expression contains only bound
330 * variables while the other contains only free variables. We can then transform the entire statement to decorrelate
331 * the predicate.
332 *
333 * (7) To decorrelate the predicate, group the correlated subquery by the correlated subexpression of the equi-predicate
334 * (E1 or E2). Then, add the original predicate as a clause to the join condition of the dependent join between the
335 * statement that binds the free variables of the subexpression and the nested query that contains the predicate
336 * (potentially by multiple levels of nesting).
337 *
338 *
339 * DEPENDENT JOIN:
340 *
341 * Transforming non-equi predicates gives no benefit over a dependent join, unless we can perform *re*-aggregation (e.g.
342 * SUM of COUNTs).
343 *
344 *====================================================================================================================*/
345
347std::vector<std::reference_wrapper<const ast::FnApplicationExpr>>
349{
350 std::vector<std::reference_wrapper<const ast::FnApplicationExpr>> aggregates;
351 auto handle_fn = overloaded {
352 [](auto&) { },
353 [&aggregates](const ast::FnApplicationExpr &e) {
354 M_insist(e.has_function());
355 if (e.get_function().is_aggregate()) { // test that this is an aggregation
356 using std::find_if, std::to_string;
357 const std::string str = to_string(e);
358 auto exists = [&str](std::reference_wrapper<const ast::Expr> agg) {
359 return to_string(agg.get()) == str;
360 };
361 if (find_if(aggregates.begin(), aggregates.end(), exists) == aggregates.end()) // if not already present
362 aggregates.push_back(e);
363 throw visit_skip_subtree{}; // skip recursing into subexpressions
364 }
365 },
366 };
367
368 if (stmt.having)
369 visit(handle_fn, *as<ast::HavingClause>(*stmt.having).having, m::tag<ast::ConstPreOrderExprVisitor>());
370
371 for (auto &s : as<ast::SelectClause>(*stmt.select).select)
372 visit(handle_fn, *s.first, m::tag<ast::ConstPreOrderExprVisitor>());
373
374 if (stmt.order_by) {
375 for (auto &o : as<ast::OrderByClause>(*stmt.order_by).order_by)
376 visit(handle_fn, *o.first, m::tag<ast::ConstPreOrderExprVisitor>());
377 }
378
379 return aggregates;
380}
381
384 const std::vector<std::reference_wrapper<const ast::Expr>> components)
385{
386 auto recurse = overloaded {
387 [&](const ast::Designator &d) -> bool {
388 return d.contains_free_variables() or d.is_identifier(); // identifiers are implicitly composable, as they never refer into a table XXX do we have to check the target?
389 },
390 [&](const ast::FnApplicationExpr &e) -> bool {
391 if (not is_composable_of(*e.fn, components)) return false;
392 for (auto &arg : e.args) {
393 if (not is_composable_of(*arg, components))
394 return false;
395 }
396 return true;
397 },
398 [&](const ast::UnaryExpr &e) -> bool { return is_composable_of(*e.expr, components); },
399 [&](const ast::BinaryExpr &e) -> bool {
400 return is_composable_of(*e.lhs, components) and is_composable_of(*e.rhs, components);
401 },
402 [](auto&) -> bool { return true; },
403 };
404
405 for (auto c : components)
406 if (expr == c.get()) return true; // syntactically equivalent to a component
407 return visit(recurse, expr, m::tag<m::ast::ConstASTExprVisitor>()); // attempt to recursively compose expr
408}
409
410GraphBuilder::GraphBuilder() : graph_(std::make_unique<QueryGraph>()) { }
411
419{
420 /*----- Compute whether the clause has bound or free variables, what tables are referenced, and which nested queries
421 * occur inside the clause. -----*/
422 auto visitor = overloaded {
423 [](auto&) -> void { },
424 [this](const ast::Designator &D) {
425 binding_depth = std::max(binding_depth, D.binding_depth());
426 if (D.has_table_name())
427 data_sources.emplace(D.get_table_name());
428 },
429 [this](const ast::QueryExpr &Q) {
430 auto B = std::make_unique<GraphBuilder>();
431 (*B)(*Q.query);
432 nested_queries.emplace_back(Q, std::move(B), Q.alias());
433 data_sources.emplace(Q.alias()); // by unnesting, this QueryExpr becomes a DataSource
434 },
435 };
436 for (const auto &pred : clause)
438}
439
441{
442 ClauseInfo CI(clause);
443
444 /*----- Introduce subqueries as sources. -----*/
445 for (auto &subquery : CI.nested_queries) {
446 auto &ref = graph_->add_source(subquery.alias, subquery.builder->get());
447 named_sources_.emplace(subquery.alias, ref);
448 for (auto &[deferred_clause, deferred_CI] : subquery.builder->deferred_clauses_) {
449 M_insist(deferred_CI.binding_depth > 0, "was this clause bound in its subquery?");
450 --deferred_CI.binding_depth;
451 // TODO process all designators, redirecting free designators when they are bound; this would enable us to
452 // lift correlated claues w/o having to introduce new designators that replace parts of the AST
453 if (deferred_CI.binding_depth == 0)
454 bound_clauses_.try_emplace(std::move(deferred_clause), std::move(deferred_CI)); // it's now bound
455 else
456 deferred_clauses_.try_emplace(std::move(deferred_clause), std::move(deferred_CI)); // still not bound, deferr further
457 }
458 }
459
460 if (not needs_grouping_) { // no grouping ⇒ all clauses can trivially be decorrelated
461 if (CI.binding_depth == 0)
462 bound_clauses_.try_emplace(clause, std::move(CI));
463 else
464 deferred_clauses_.try_emplace(clause, std::move(CI));
465 return;
466 }
467
468 /* A clause with a single predicate can be an equi-clause. If it is an equi-clause, it can be decorrelated more
469 * cleverly by introducing additional grouping keys. */
470 if (CI.binding_depth != 0 and clause.size() == 1) {
471 auto pred = clause[0];
472
473 /*----- `pred` is correlated: `pred` contains both bound and free variables -----*/
474 auto binary = cast<const ast::BinaryExpr>(&pred.expr());
475 if (not binary) // not binary expression ⇒ no equi-clause
476 goto fallback;
477
478 if (binary->op().type != m::TK_EQUAL) // no equi-predicate ⇒ no equi-clause
479 goto fallback;
480
481 /*----- The clause contains a single equi-predicate. Check whether it can be decorrelated. -----*/
482 bool lhs_has_free_variables = binary->lhs->contains_free_variables();
483 bool rhs_has_free_variables = binary->rhs->contains_free_variables();
484 if (lhs_has_free_variables and binary->lhs->contains_bound_variables()) // mixed LHS
485 goto fallback;
486 if (rhs_has_free_variables and binary->rhs->contains_bound_variables()) // mixed RHS
487 goto fallback;
488 M_insist(lhs_has_free_variables != rhs_has_free_variables, "only either side may contain free variables");
489
490 /*----- The clause contains a single equi-predicate, that can be decorrelated. -----*/
491 deferred_clauses_.try_emplace(clause, std::move(CI));
492 // auto &expr_with_free_vars = lhs_has_free_variables ? *binary->lhs : *binary->rhs;
493 auto &expr_with_bound_vars = lhs_has_free_variables ? *binary->rhs : *binary->lhs;
494
495 /*----- Ensure that the *bound* part of the equi-predicate is comosable of the grouping keys. -----*/
496 if (not is_composable_of(expr_with_bound_vars, existing_grouping_keys_)) // is bound part composable?
497 additional_grouping_keys_.emplace(expr_with_bound_vars); // no ⇒ introduce new grouping key
498 return;
499 }
500
501fallback:
502 /*----- Clauses with more than one predicate are *always* non-equi clauses and must be analyzed accordingly. -----*/
503 /* This is *not* an equi-clause, since it has more than one predicate joined by disjunction.
504 * Check whether the clause is correlated, i.e. whether it contains both free and bound variables. Note, that
505 * it is not enough to check whether the predicates of this clause are correlated, as a predicate of only bound
506 * variables together with a predicate of only free variables renders the clause correlated.
507 *
508 * If the clause is correlated, check whether *all* bound variables are embedded in the grouping keys. If this
509 * is the case, we can *trivially* decorrelate the clause. Otherwise, decorrelation is not possible and we
510 * resort to dependent join. */
511
512 /*----- Check whether the clause is correlated. -----*/
513 if (CI.binding_depth == 0) {
514 bound_clauses_.try_emplace(clause, std::move(CI));
515 } else {
516 /* The clause contains free variables. Check whether the predicates are composable of the grouping keys. */
517 for (auto pred : clause) {
518 if (not is_composable_of(pred.expr(), existing_grouping_keys_)) {
519 // TODO dependent join
520 throw m::invalid_argument("the bound parts of a correlated clause cannot be composed of the "
521 "grouping keys");
522 }
523 }
524 /* All predicates of this clause are composable of the grouping keys. Therefore, the entire clause can
525 * trivially be decorrelated. */
526 deferred_clauses_.try_emplace(clause, std::move(CI));
527 }
528}
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
554bool equal(const ast::Designator &one, const ast::Designator &two) {
555 return streq(to_string(one).c_str(), to_string(two).c_str());
556}
557
559void emplace_back_unique(std::vector<std::pair<const ast::Expr*, PooledOptionalString>> &pairs,
560 const std::pair<const ast::Designator*, PooledOptionalString> &pair)
561{
562 for (auto p : pairs) {
563 if (auto d = cast<const ast::Designator>(p.first); d and equal(*d, *pair.first))
564 return;
565 }
566 pairs.emplace_back(pair);
567}
568
570void emplace_back_unique(std::vector<std::pair<const ast::Expr*, PooledOptionalString>> &pairs,
571 const std::pair<const ast::Expr*, PooledOptionalString> &pair)
572{
573 if (auto d = cast<const ast::Designator>(pair.first))
574 emplace_back_unique(pairs, std::pair<const ast::Designator*, PooledOptionalString>(d, pair.second));
575 else if (not contains(pairs, pair))
576 pairs.emplace_back(pair);
577}
578
580void emplace_back(std::vector<const ast::Expr*> &exprs, const ast::Designator *des)
581{
582 for (auto e : exprs) {
583 if (auto d = cast<const ast::Designator>(e); d and equal(*d, *des))
584 return;
585 }
586 exprs.emplace_back(des);
587}
588
590void emplace_back_unique(std::vector<const ast::Expr*> &exprs, const ast::Expr *expr)
591{
592 if (auto d = cast<const ast::Designator>(expr))
593 emplace_back(exprs, d);
594 else if (not contains(exprs, expr))
595 exprs.emplace_back(expr);
596}
597
599void emplace_back_unique(std::vector<DataSource*> &sources, DataSource *src)
600{
601 for (auto s : sources) {
602 if (s->name() == src->name())
603 return;
604 }
605 sources.emplace_back(src);
606}
607
610void insert(std::vector<const ast::Expr*> &exprs, const std::vector<const ast::Designator*> &insertions) {
611 for (auto d : insertions)
612 emplace_back(exprs, d);
613}
614
617void insert(std::vector<const ast::Expr*> &exprs, const std::vector<const ast::Expr*> &insertions) {
618 for (auto e : insertions) {
619 if (auto d = cast<const ast::Designator>(e))
620 emplace_back(exprs, d);
621 else if (not contains(exprs, e))
622 exprs.emplace_back(e);
623 }
624}
625
627struct GetNestedQueries : ast::ConstASTExprVisitor
628{
629 private:
630 std::vector<const ast::QueryExpr*> queries_;
631
632 public:
634
635 std::vector<const ast::QueryExpr*> get() { return std::move(queries_); }
636
637 using ConstASTExprVisitor::operator();
638 void operator()(Const<ast::ErrorExpr>&) override { M_unreachable("graph must not contain errors"); }
639
640 void operator()(Const<ast::Designator>&) override { /* nothing to be done */ }
641
642 void operator()(Const<ast::Constant>&) override { /* nothing to be done */ }
643
644 void operator()(Const<ast::FnApplicationExpr>&) override { /* nothing to be done */ }
645
646 void operator()(Const<ast::UnaryExpr> &e) override { (*this)(*e.expr); }
647
648 void operator()(Const<ast::BinaryExpr> &e) override { (*this)(*e.lhs); (*this)(*e.rhs); }
649
650 void operator()(Const<ast::QueryExpr> &e) override { queries_.push_back(&e); }
651};
652
654#if 0
655struct m::GetPrimaryKey
656{
657 private:
658 std::vector<const ast::Expr*> primary_keys_;
659
660 public:
661 GetPrimaryKey() { }
662
663 auto get() { return std::move(primary_keys_); }
664
665 void compute(DataSource *source) {
666 if (auto base = cast<BaseTable>(source)) {
667 Catalog &C = Catalog::Get();
668 Position pos(C.pool("Decorrelation"));
669 ast::Token dot(pos, C.pool("."), TK_DOT);
670 ast::Token tbl(pos, base->name(), TK_IDENTIFIER);
671 for (auto pkey : base->table().primary_key()) {
672 ast::Token attr(pos, pkey->name, TK_IDENTIFIER);
673 auto D = new ast::Designator(dot, tbl, attr, pkey->type, pkey);
674 primary_keys_.push_back(D);
675 // base->expansion_.push_back(D);
676 }
677 } else if (auto query = cast<Query>(source)) {
678 auto &graph = query->query_graph();
679 auto former_size = primary_keys_.size();
680 if (graph.grouping()) {
681 /* Grouping keys form a new primary key. */
682 for (auto e : graph.group_by()) {
683 Catalog &C = Catalog::Get();
684 Position pos(C.pool("Decorrelation"));
685 ast::Token dot(pos, C.pool("."), TK_DOT);
686 ast::Token tbl(pos, query->name(), TK_IDENTIFIER);
687 std::ostringstream oss;
688 oss << e;
689 ast::Token attr(pos, C.pool(oss.str().c_str()), TK_IDENTIFIER);
690 auto D = new ast::Designator(dot, tbl, attr, as<const PrimitiveType>(e.get().type())->as_scalar(), e);
691 primary_keys_.push_back(D);
692 }
693 } else {
694 /* Primary keys of all sources for the primary key. */
695 for (auto ds : graph.sources())
696 compute(ds);
697 }
698 /* Provide all newly inserted primary keys. */
699 if (not graph.projections().empty()) {
700 while (former_size != primary_keys_.size()) {
701 emplace_back_unique(graph.projections_,
702 std::pair<const ast::Expr *, const char *>(primary_keys_.at(former_size), nullptr));
703 ++former_size;
704 }
705 }
706 } else {
707 M_unreachable("invalid variant");
708 }
709 }
710};
711#endif
712
715 // GetPrimaryKey GPK; // TODO we could write this much denser if we had a recursive Expr visitor
716 // GPK.compute(source);
717 // return GPK.get();
718}
719
721#if 0
722struct GetDesignators : ast::ConstASTExprVisitor
723{
724 private:
725 std::vector<const ast::Expr*> designators_;
726 bool aggregates_ = true;
727
728 public:
729 GetDesignators() { }
730
731 std::vector<const ast::Expr*> get() { return std::move(designators_); }
732
733 void aggregates(bool aggregates) { aggregates_ = aggregates; }
734
735 using ConstASTExprVisitor::operator();
736 void operator()(Const<ast::ErrorExpr>&) override { M_unreachable("graph must not contain errors"); }
737
738 void operator()(Const<ast::Designator> &e) override { designators_.push_back(&e); }
739
740 void operator()(Const<ast::Constant>&) override { /* nothing to be done */ }
741
742 void operator()(Const<ast::FnApplicationExpr> &e) override {
743 if (aggregates_)
744 designators_.push_back(&e);
745 else {
746 for (auto &arg : e.args)
747 (*this)(*arg);
748 }
749 }
750
751 void operator()(Const<ast::UnaryExpr> &e) override { (*this)(*e.expr); }
752
753 void operator()(Const<ast::BinaryExpr> &e) override { (*this)(*e.lhs); (*this)(*e.rhs); }
754
755 void operator()(Const<ast::QueryExpr>&) override { /* nothing to be done */ }
756};
757#endif
758
761#if 0
762auto get_needed_attrs(const QueryGraph *graph, const Query &query)
763{
764 M_insist(contains(graph->sources(), &query));
765
766 GetDesignators GD; // TODO we could write this much denser if we had a recursive Expr visitor
767 auto it = std::find(graph->sources().begin(), graph->sources().end(), &query);
768 for (auto &c : (*it)->filter()) {
769 for (auto &p : c)
770 GD(*p.expr());
771 }
772 for (auto &j : (*it)->joins()) {
773 for (auto &c : j->condition()) {
774 for (auto &p : c)
775 GD(*p.expr());
776 }
777 }
778 if (graph->grouping()) {
779 for (auto &e : graph->group_by())
780 GD(*e);
781 GD.aggregates(false); // to search for needed attributes in the arguments of aggregates
782 for (auto &e : graph->aggregates())
783 GD(*e);
784 } else {
785 for (auto &e : graph->projections())
786 GD(*e.first);
787 }
788 return GD.get();
789}
790#endif
791
793#if 0
794struct ReplaceDesignators : ast::ConstASTExprVisitor
795{
796 private:
797 const std::vector<const ast::Expr*> *origin_;
798 ast::Designator *new_designator_ = nullptr;
799 std::vector<const ast::Expr*> deletions_;
800
801 public:
802 ReplaceDesignators() { }
803
808 std::vector<const ast::Expr*> replace(cnf::CNF &cnf, const std::vector<const ast::Expr*> *origin,
809 bool target_deleted = false)
810 {
811 origin_ = origin;
812 deletions_.clear();
813 cnf::CNF cnf_new;
814 for (auto &c : cnf) {
815 cnf::Clause c_new;
816 for (auto &p : c) {
817 auto e = p.expr();
818 (*this)(*e);
819 if (cast<const ast::Designator>(e) and new_designator_) {
820 /* `e` has to be replaced by `new_designator_`. `e` has to be deleted manually iff the target is
821 * deleted (since it is removed from `target`) and `new_designator` has to be deleted manually iff the
822 * target is not deleted. */
823 c_new = c_new or cnf::Clause({cnf::Predicate::Create(new_designator_, p.negative())});
824 if (target_deleted)
825 deletions_.push_back(e);
826 else
827 deletions_.push_back(new_designator_);
828 new_designator_ = nullptr;
829 } else {
830 c_new = c_new or cnf::Clause({p});
831 }
832 }
833 cnf_new = cnf_new and cnf::CNF({c_new});
834 }
835 cnf = std::move(cnf_new);
836 return std::move(deletions_);
837 }
838
843 std::vector<const ast::Expr*> replace(std::vector<const ast::Expr*> &target, const std::vector<const ast::Expr*> *origin,
844 bool target_deleted = false) {
845 origin_ = origin;
846 deletions_.clear();
847 std::vector<const ast::Expr*> target_new;
848 for (auto e : target) {
849 (*this)(*e);
850 if (cast<const ast::Designator>(e) and new_designator_) {
851 /* `e` has to be replaced by `new_designator_`. `e` has to be deleted manually iff the target is
852 * deleted (since it is removed from `target`) and `new_designator` has to be deleted manually iff the
853 * target is not deleted. */
854 target_new.emplace_back(new_designator_);
855 if (target_deleted)
856 deletions_.push_back(e);
857 else
858 deletions_.push_back(new_designator_);
859 new_designator_ = nullptr;
860 } else {
861 target_new.emplace_back(e);
862 }
863 }
864 target = std::move(target_new);
865 return std::move(deletions_);
866 }
867
872 std::vector<const ast::Expr*> replace(std::vector<std::pair<const ast::Expr*, const char*>> &target,
873 const std::vector<const ast::Expr*> *origin, bool target_deleted = false)
874 {
875 origin_ = origin;
876 deletions_.clear();
877 std::vector<std::pair<const ast::Expr*, const char*>> target_new;
878 for (auto e : target) {
879 (*this)(*e.first);
880 if (cast<const ast::Designator>(e.first) and new_designator_) {
881 /* `e.first` has to be replaced by `new_designator_`. `e.first` has to be deleted manually iff the
882 * target is deleted (since it is removed from `target`) and `new_designator` has to be deleted manually
883 * iff the target is not deleted. */
884 target_new.emplace_back(new_designator_, e.second);
885 if (target_deleted)
886 deletions_.push_back(e.first);
887 else
888 deletions_.push_back(new_designator_);
889 new_designator_ = nullptr;
890 } else {
891 target_new.emplace_back(e);
892 }
893 }
894 target = std::move(target_new);
895 return std::move(deletions_);
896 }
897
898 private:
899 using ConstASTExprVisitor::operator();
900 void operator()(Const<ast::ErrorExpr>&) override { M_unreachable("graph must not contain errors"); }
901
902 void operator()(Const<ast::Designator> &e) override {
903 if (auto d = findDesignator(e))
904 new_designator_ = make_designator(d);
905 }
906
907 void operator()(Const<ast::Constant>&) override { /* nothing to be done */ }
908
909 void operator()(Const<ast::FnApplicationExpr> &e) override {
910 std::vector<std::unique_ptr<ast::Expr>> args_new;
911 for (auto &arg : e.args) {
912 (*this)(*arg);
913 if (new_designator_) {
914 /* Replace `arg` by the newly generated designator. */
915 deletions_.push_back(arg.get());
916 args_new.emplace_back(new_designator_);
917 new_designator_ = nullptr;
918 } else {
919 args_new.emplace_back(arg.get());
920 }
921 }
922 const_cast<ast::FnApplicationExpr*>(&e)->args = std::move(args_new);
923 }
924
925 void operator()(Const<ast::UnaryExpr> &e) override {
926 (*this)(*e.expr);
927 if (new_designator_) {
928 /* Replace `expr` by the newly generated designator. */
929 deletions_.push_back(e.expr.get());
930 const_cast<ast::UnaryExpr*>(&e)->expr = std::unique_ptr<ast::Designator>(new_designator_);
931 new_designator_ = nullptr;
932 }
933 }
934
935 void operator()(Const<ast::BinaryExpr> &e) override {
936 (*this)(*e.lhs);
937 if (new_designator_) {
938 /* Replace `expr` by the newly generated designator. */
939 deletions_.push_back(e.lhs.get());
940 const_cast<ast::BinaryExpr*>(&e)->lhs = std::unique_ptr<ast::Designator>(new_designator_);
941 new_designator_ = nullptr;
942 }
943 (*this)(*e.rhs);
944 if (new_designator_) {
945 /* Replace `expr` by the newly generated designator. */
946 deletions_.push_back(e.rhs.get());
947 const_cast<ast::BinaryExpr*>(&e)->rhs = std::unique_ptr<ast::Designator>(new_designator_);
948 new_designator_ = nullptr;
949 }
950 }
951
952 void operator()(Const<ast::QueryExpr>&) override { /* nothing to be done */ }
953
955 const ast::Designator * findDesignator(const ast::Designator &des) {
956 for (auto e : *origin_) {
957 if (auto d = cast<const ast::Designator>(e)) {
958 if (equal(*d, des))
959 return d;
960 }
961 }
962 return nullptr;
963 }
964
967 ast::Designator * make_designator(const ast::Designator *d) {
968 Catalog &C = Catalog::Get();
969 Position pos(C.pool("Decorrelation"));
970 ast::Token dot(pos, C.pool("."), TK_DOT);
971 std::ostringstream oss;
972 oss << *d;
973 ast::Token attr(pos, C.pool(oss.str().c_str()), TK_IDENTIFIER);
974 auto D = new ast::Designator(dot, ast::Token(), attr, as<const PrimitiveType>(d->type())->as_scalar(), d);
975 return D;
976 }
977};
978#endif
979
981#if 0
982struct m::GetCorrelationInfo : ast::ConstASTExprVisitor
983{
984 friend struct Decorrelation;
985
987 struct CorrelationInfo : ast::ASTExprVisitor
988 {
989 cnf::Clause clause;
990 std::set<const ast::Expr*> uncorrelated_exprs;
991 std::set<const char*> sources;
992 private:
993 const char *table_name_;
994 std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> old_to_new_outer_;
995 std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> old_to_new_inner_;
996 std::unordered_set<const ast::Designator*> replaced_designators_;
997 bool is_decorrelation_finished_ = false;
998 // TODO move the post-decorrelation processing into a separate class
999
1000 public:
1001 CorrelationInfo(cnf::Clause clause) : clause(std::move(clause)) { }
1002
1007 void replace(std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> &old_to_new,
1008 const char *table_name, std::vector<const ast::Expr*> &expansion) {
1009 table_name_ = table_name;
1010 old_to_new_outer_ = std::move(old_to_new);
1011 /* Update `clause`. */
1012 for (auto pred : clause)
1013 (*this)(*pred);
1014 /* Update `uncorrelated_exprs`. */
1015 std::set<const ast::Expr*> uncorrelated_exprs_new;
1016 for (auto e : uncorrelated_exprs) {
1017 if (auto d = cast<const ast::Designator>(e)) {
1018 auto d_new = get_new(d);
1019 try {
1020 uncorrelated_exprs_new.emplace(old_to_new_inner_.at(d_new));
1021 /* Occurrence of `d` was possibly replaced by newly created designator and not by `d_new`.
1022 * Therefore, add `d_new` to `replaced_designators` to ensure deletion by caller. */
1023 replaced_designators_.emplace(d_new);
1024 } catch (std::out_of_range&) {
1025 /* `d_new` was not found in `old_to_new_inner` since no table name was specified and no new
1026 * designator was created by this function. Add `d_new` as new uncorrelated expression. */
1027 M_insist(not table_name_);
1028 uncorrelated_exprs_new.emplace(d_new);
1029 }
1030 } else {
1031 uncorrelated_exprs_new.emplace(e);
1032 }
1033 }
1034 uncorrelated_exprs = std::move(uncorrelated_exprs_new);
1035 old_to_new_inner_.clear();
1036 /* Add replaced designators to expansion list. */
1037 for (auto d : replaced_designators_)
1038 expansion.push_back(d);
1039 replaced_designators_.clear();
1040 }
1041
1043 void finish_decorrelation() {
1044 is_decorrelation_finished_ = true;
1045 for (auto &p : clause)
1046 (*this)(*p);
1047 }
1048
1049 private:
1050 using ASTExprVisitor::operator();
1051 void operator()(Const<ast::ErrorExpr>&) override { M_unreachable("graph must not contain errors"); }
1052
1053 void operator()(Const<ast::Designator> &d) override { if (is_decorrelation_finished_) d.set_bound(); }
1054
1055 void operator()(Const<ast::Constant>&) override { /* nothing to be done */ }
1056
1057 void operator()(Const<ast::FnApplicationExpr>&) override { /* nothing to be done */ }
1058
1059 void operator()(Const<ast::UnaryExpr> &e) override {
1060 if (is_decorrelation_finished_) {
1061 (*this)(*e.expr);
1062 } else {
1063 if (auto d = cast<ast::Designator>(e.expr.get()); d and contains(uncorrelated_exprs, d)) {
1064 /* Replace this designator by an equivalent new one. Set table name iff present. */
1065 e.expr = std::unique_ptr<ast::Expr>(table_name_ ? set_table_name(get_new(d)) : get_new(d));
1066 replaced_designators_.emplace(d);
1067 } else {
1068 (*this)(*e.expr);
1069 }
1070 }
1071 }
1072
1073 void operator()(Const<ast::BinaryExpr> &e) override {
1074 if (is_decorrelation_finished_) {
1075 (*this)(*e.lhs);
1076 (*this)(*e.rhs);
1077 } else {
1078 if (auto d = cast<ast::Designator>(e.lhs.get()); d and contains(uncorrelated_exprs, d)) {
1079 /* Replace this designator by an equivalent new one. Set table name iff present. */
1080 e.lhs = std::unique_ptr<ast::Expr>(table_name_ ? set_table_name(get_new(d)) : get_new(d));
1081 replaced_designators_.emplace(d);
1082 } else {
1083 (*this)(*e.lhs);
1084 }
1085 if (auto d = cast<ast::Designator>(e.rhs.get()); d and contains(uncorrelated_exprs, d)) {
1086 /* Replace this designator by an equivalent new one. Set table name iff present. */
1087 e.rhs = std::unique_ptr<ast::Expr>(table_name_ ? set_table_name(get_new(d)) : get_new(d));
1088 replaced_designators_.emplace(d);
1089 } else {
1090 (*this)(*e.rhs);
1091 }
1092 }
1093 }
1094
1095 void operator()(Const<ast::QueryExpr>&) override { /* nothing to be done */ }
1096
1097 ast::Designator * get_new(ast::Designator *d) {
1098 if (auto it = old_to_new_outer_.find(d); it != old_to_new_outer_.end())
1099 return it->second;
1100 else
1101 return d;
1102 }
1103 const ast::Designator * get_new(const ast::Designator *d) {
1104 if (auto it = old_to_new_outer_.find(d); it != old_to_new_outer_.end())
1105 return it->second;
1106 else
1107 return d;
1108 }
1109
1110 ast::Designator * set_table_name(const ast::Designator *d) {
1111 Catalog &C = Catalog::Get();
1112 Position pos(C.pool("Decorrelation"));
1113 ast::Token dot(pos, C.pool("."), TK_DOT);
1114 ast::Token tbl(pos, C.pool(table_name_), TK_IDENTIFIER);
1115 auto D = new ast::Designator(dot, tbl, d->attr_name, d->type(), d);
1116 old_to_new_inner_.emplace(d, D);
1117 return D;
1118 }
1119 };
1120
1121 private:
1122 std::vector<CorrelationInfo*> equi_;
1123 std::vector<CorrelationInfo*> non_equi_;
1124
1125 CorrelationInfo *info_;
1126
1127 std::vector<const ast::Expr*> expansion_;
1128
1129 public:
1130 GetCorrelationInfo() { }
1131 ~GetCorrelationInfo() {
1132 for (auto d : expansion_)
1133 delete d;
1134 }
1135
1138 cnf::CNF compute(const cnf::CNF &cnf) {
1139 cnf::CNF res;
1140 for (auto &c : cnf) { // iterate over a single clause (de facto)
1141 info_ = new CorrelationInfo(c); // store reference to the *currently built* CorrelationInfo
1142 bool is_equi = c.size() <= 1; // the clause can only be an equi-predicate if there is no disjunction
1143 for (auto &p : c) { // fill `info_` for each predicate of clause `c`
1144 (*this)(*p); // visitor GetCorrelationInfo(p), fills the current `info_` with data
1145 if (p.negative())
1146 is_equi = is_equi and is_not_equal(&p.expr());
1147 else
1148 is_equi = is_equi and is_equal(&p.expr());
1149 }
1150 if (info_->sources.empty()) {
1151 /* c is not correlated. */
1152 res = res and cnf::CNF({c});
1153 delete info_;
1154 } else {
1155 /* c is correlated. */
1156 if (is_equi)
1157 equi_.push_back(info_);
1158 else
1159 non_equi_.push_back(info_);
1160 }
1161 }
1162 return res;
1163 }
1164
1166 std::vector<CorrelationInfo*> getEqui() { return std::move(equi_); }
1168 std::vector<CorrelationInfo*> getNonEqui() { return std::move(non_equi_); }
1169
1170 bool empty() { return equi_.empty() and non_equi_.empty(); }
1171 bool non_equi() { return not non_equi_.empty(); }
1172
1173 private:
1174 using ConstASTExprVisitor::operator();
1175 void operator()(Const<ast::ErrorExpr>&) override { M_unreachable("graph must not contain errors"); }
1176
1177 void operator()(Const<ast::Designator> &e) override {
1178 if (e.contains_free_variables()) {
1179 auto names = find_table_name(e);
1180 info_->sources.insert(names.begin(), names.end());
1181 } else {
1182 info_->uncorrelated_exprs.emplace(&e);
1183 }
1184 }
1185
1186 void operator()(Const<ast::Constant>&) override { /* nothing to be done */ }
1187
1188 void operator()(Const<ast::FnApplicationExpr> &e) override { info_->uncorrelated_exprs.emplace(&e); }
1189
1190 void operator()(Const<ast::UnaryExpr> &e) override { (*this)(*e.expr); }
1191
1192 void operator()(Const<ast::BinaryExpr> &e) override { (*this)(*e.lhs); (*this)(*e.rhs); }
1193
1194 void operator()(Const<ast::QueryExpr>&) override { /* nothing to be done */ }
1195
1196 bool is_equal(const ast::Expr *e) {
1197 if (auto b = cast<const ast::BinaryExpr>(e))
1198 return b->op() == TK_EQUAL;
1199 else
1200 return true;
1201 }
1202
1203 bool is_not_equal(const ast::Expr *e) {
1204 if (auto b = cast<const ast::BinaryExpr>(e))
1205 return b->op() == TK_BANG_EQUAL;
1206 else
1207 return true;
1208 }
1209
1210 std::set<const char*> find_table_name(const ast::Designator &d) {
1211 if (d.has_table_name()) return {d.get_table_name()};
1212 if (std::holds_alternative<const Attribute*>(d.target())) {
1213 return {std::get<const Attribute *>(d.target())->table.name};
1214 } else if (std::holds_alternative<const ast::Expr*>(d.target())) {
1215 GetDesignators GD;
1216 GD(*std::get<const ast::Expr*>(d.target()));
1217 std::set<const char*> res;
1218 for (auto des : GD.get()) {
1219 /* Skip `FnApplicationExpr`s because they have no table name. */
1220 if (auto d = cast<const ast::Designator>(des)) {
1221 auto names = find_table_name(*d);
1222 res.insert(names.begin(), names.end());
1223 }
1224 }
1225 return res;
1226 } else {
1227 M_unreachable("target can not be `std::monostate`");
1228 }
1229 }
1230};
1231#endif
1232
1234#if 0
1235struct m::Decorrelation
1236{
1237 private:
1239 using graphs_t = std::pair<QueryGraph*, Query*>;
1240
1242 Query &query_;
1244 QueryGraph *graph_;
1245
1246 std::vector<graphs_t> correlated_nested_queries_;
1247 std::vector<const ast::Expr*> needed_attrs_;
1248
1249 public:
1250 Decorrelation(Query &query, QueryGraph *graph)
1251 : query_(query)
1252 , graph_(graph)
1253 , correlated_nested_queries_({{graph, nullptr}})
1254 { }
1255
1258 bool decorrelate() {
1259 /* Find the innermost, correlated graph and save the path in `graphs_`. */
1260 find_correlated_nested_graphs(query_.query_graph(), &query_);
1261
1262 /* Push-up all equi-predicates (starting with the innermost query) until the first non-equi-predicate below a
1263 * grouping is reached. */
1264 while (correlated_nested_queries_.size() > 1) {
1265 auto last = correlated_nested_queries_.back();
1266 if (last.first->correlation_info_->non_equi() and last.first->grouping()) break;
1267 correlated_nested_queries_.pop_back();
1268 push_predicates(correlated_nested_queries_.back().first, last.second);
1269 }
1270
1271 M_insist(not correlated_nested_queries_.empty());
1272
1273 if (correlated_nested_queries_.size() > 1) {
1274 /* Initialize `needed_attrs_` for operator push-up. */
1275 needed_attrs_ = get_needed_attrs(graph_, query_);
1276 /* Provide all table names explicitly because the attribute name does not have to be unique anymore. */
1277 for (auto des : needed_attrs_) {
1278 /* Skip `FnApplicationExpr`s because they have no table name. */
1279 if (auto d = cast<const ast::Designator>(des); d and not d->has_explicit_table_name())
1280 const_cast<ast::Designator*>(d)->table_name.type = TK_IDENTIFIER;
1281 }
1282
1283 /* Push-up all operators until the innermost non-equi-predicate is reached. */
1284 for (auto it = correlated_nested_queries_.begin(); it != std::prev(correlated_nested_queries_.end());) {
1285 auto &upper = *it;
1286 ++it;
1287 auto &lower = *it;
1288 pushOperators(lower.first, upper.first, lower.second);
1289 }
1290
1291 /* Adapt all designators above the innermost non-equi-predicate. */
1292 auto *origin = &correlated_nested_queries_.back().first->group_by_;
1293 M_insist(not origin->empty());
1294 for (auto i = correlated_nested_queries_.size() - 1; i != 0; --i) {
1295 auto &upper = correlated_nested_queries_[i - 1];
1296 M_insist(not origin->empty());
1297 replace_designators(upper.first, origin);
1298 if (upper.first->grouping())
1299 origin = &upper.first->group_by_;
1300 }
1301 }
1302
1303 auto iterator = correlated_nested_queries_.end(); // iterator to the innermost graph which is not completely decorrelated
1304 for (auto it = correlated_nested_queries_.begin(); it != correlated_nested_queries_.end(); ++it) {
1305 if (not (*it).first->correlation_info_->empty()) {
1306 /* In this graph not all correlated predicates are decorrelated. */
1307 iterator = it;
1308 }
1309 }
1310 if (iterator == correlated_nested_queries_.end()) {
1311 /* All correlated predicates are decorrelated. */
1312 return true;
1313 } else {
1314 /* Not all correlated predicates are decorrelated. Unset `decorrelated_`-flag of all queries until the
1315 * innermost not decorrelated predicate. */
1316 for (auto it = correlated_nested_queries_.begin(); it != iterator;)
1317 (*++it).second->decorrelated_ = false;
1318 return false;
1319 }
1320 }
1321
1322 private:
1325 void find_correlated_nested_graphs(QueryGraph *graph, Query *query) {
1326 correlated_nested_queries_.emplace_back(graph, query);
1327
1328 bool found = false;
1329 for (auto ds : graph->sources()) {
1330 if (ds->decorrelated_) continue; // already decorrelated, nothing to be done
1331 M_insist(not found, "Multiple not yet decorrelated nested queries in one graph should not be possible.");
1332 found = true;
1333 auto q = as<Query>(ds);
1334 find_correlated_nested_graphs(q->query_graph(), q);
1335 }
1336 }
1337
1342 void push_predicates(QueryGraph *outer_graph, Query *query) {
1343 QueryGraph *inner_graph = query->query_graph();
1344 M_insist(contains(outer_graph->sources(), query));
1345
1346 const bool inner_has_projection = not inner_graph->projections().empty();
1347 auto grouping = inner_graph->grouping();
1348 auto equi = inner_graph->correlation_info_->getEqui();
1349 auto non_equi = inner_graph->correlation_info_->getNonEqui();
1350 inner_graph->correlation_info_->equi_.clear();
1351 inner_graph->correlation_info_->non_equi_.clear();
1352 bool is_equi = true;
1353 M_insist(non_equi.empty() or not grouping);
1354 for (auto it = equi.begin(); it != non_equi.end(); ++it) {
1355 if (it == equi.end()) {
1356 it = non_equi.begin(); // to iterate over equi and non_equi
1357 is_equi = false;
1358 if (it == non_equi.end()) break; // to skip empty non_equi
1359 }
1360 auto correlation_info = *it;
1361 /* Adapt projection and grouping. */
1362 std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> old_to_new;
1363 for (auto e : correlation_info->uncorrelated_exprs) {
1364 const ast::Expr *e_new = e;
1365 if (grouping) { // ensure that `e` is maintained through grouping
1366 emplace_back_unique(inner_graph->group_by_, e); // the `inner_graph` must group by `e`
1367 /*----- Create name for projecting `e`. -----*/
1368 if (auto d = cast<const ast::Designator>(e)) {
1369 /* Create a new designator pointing to `d` (the grouping key) and add respective mapping. */
1370 Catalog &C = Catalog::Get();
1371 std::ostringstream oss;
1372 oss << *d;
1373 Position pos(C.pool("Decorrelation"));
1374 ast::Token attr(pos, C.pool(oss.str().c_str()), TK_IDENTIFIER);
1375 auto d_new = std::make_unique<ast::Designator>(attr, ast::Token(), attr, as<const PrimitiveType>(d->type())->as_scalar(), d);
1376 auto res = old_to_new.emplace(d, std::move(d_new));
1377 e_new = res.first->second.get();
1378 }
1379 }
1380 if (inner_has_projection)
1382 inner_graph->projections_,
1383 std::pair<const ast::Expr*, const char*>(e_new, /* no alias */ nullptr)
1384 );
1385 }
1386 /* Update `correlation_info->uncorrelated_expr` and `correlation_info->clause` by replacing the table name
1387 * of all uncorrelated designators by the alias of `query` (if given) because it will be pushed up and
1388 * therefore the former sources are only reachable via `query`. */
1389 correlation_info->replace(old_to_new, query->alias(), inner_graph->correlation_info_->expansion_);
1390 /* Search needed sources. */
1391 std::vector<DataSource*> sources{query};
1392 auto all_reachable = true;
1393 for (auto s : correlation_info->sources) {
1394 if (auto src = findSource(outer_graph->sources(), s)) {
1395 emplace_back_unique(sources, src);
1396 } else {
1397 /* The source s is not reachable. */
1398 all_reachable = false;
1399 break;
1400 }
1401 }
1402 /* Add join or rather filter. */
1403 if (all_reachable) {
1404 correlation_info->finish_decorrelation();
1405 if (auto j = findJoin(outer_graph->joins(), sources)) {
1406 /* A join with the same data sources already exists so only update the predicate. */
1407 j->update_condition(cnf::CNF({correlation_info->clause}));
1408 } else {
1409 /* Create a new join for the data sources. */
1410 auto J = outer_graph->joins_.emplace_back(new Join(cnf::CNF({correlation_info->clause}), sources));
1411 for (auto ds : J->sources())
1412 ds->add_join(J);
1413 }
1414 delete correlation_info;
1415 } else {
1416 /* Add `i` to the upper correlation information. */
1417 if (is_equi)
1418 outer_graph->correlation_info_->equi_.push_back(correlation_info);
1419 else
1420 outer_graph->correlation_info_->non_equi_.push_back(correlation_info);
1421 }
1422 }
1423 query->decorrelated_ = true;
1424 }
1425
1432 void pushOperators(QueryGraph *lower, QueryGraph *upper, Query *query) {
1433 M_insist(query->query_graph() == lower);
1434 M_insist(contains(upper->sources(), query));
1435 auto lower_sources = lower->sources();
1436
1437 /* Remove joins of `query` and add conditions as filter. */
1438 for (auto j : query->joins()) {
1439 query->update_filter(j->condition());
1440 upper->remove_join(j);
1441 for (auto ds : j->sources())
1442 ds->remove_join(j);
1443 delete j;
1444 }
1445 auto grouping = lower->grouping();
1446 /* Move all sources except `query` from `upper` to `lower` and adapt grouping of `lower`.
1447 * Change source id such that the id's of all sources of `lower` are sequential. */
1448 size_t num_sources = lower->sources().size();
1449 for (auto it = upper->sources_.begin(); it != upper->sources_.end(); ++it) {
1450 if (*it == query) continue;
1451 (*it)->id_ = num_sources++;
1452 lower->sources_.emplace_back(*it);
1453 if (grouping) {
1454 auto primary_key = get_primary_key(*it);
1455 M_insist(not primary_key.empty(), "can not push-up grouping above source without primary key");
1456 insert(lower->group_by_, primary_key);
1457 }
1458 }
1459 query->id_ = 0;
1460 upper->sources_ = {query};
1461 /* Move all joins of `upper` to `lower`. */
1462 lower->joins_.insert(lower->joins_.end(), upper->joins_.begin(), upper->joins_.end());
1463 upper->joins_.clear();
1464 /* Add all later needed attributes (of `graph_`) to grouping and projection of `lower`. */
1465 if (grouping)
1466 insert(lower->group_by_, needed_attrs_);
1467 if (not lower->projections().empty()) {
1468 for (auto attr : needed_attrs_)
1469 emplace_back_unique(lower->projections_, std::pair<const ast::Expr*, const char*>(attr, nullptr));
1470 }
1471 /* Add joins for correlated predicates iff possible. */
1472 auto equi = lower->correlation_info_->getEqui();
1473 auto non_equi = lower->correlation_info_->getNonEqui();
1474 lower->correlation_info_->equi_.clear();
1475 lower->correlation_info_->non_equi_.clear();
1476 bool is_equi = true;
1477 for (auto it = equi.begin(); it != non_equi.end(); ++it) {
1478 if (it == equi.end()) {
1479 it = non_equi.begin(); // to iterate over equi and non_equi
1480 is_equi = false;
1481 if (it == non_equi.end()) break; // to skip empty non_equi
1482 }
1483 auto i = *it;
1484 /* Search needed sources. Since `lower` could initially contain multiple sources which contribute to this
1485 * join we have to check the table names of uncorrelated designators, too. */
1486 std::vector<DataSource*> sources;
1487 auto all_reachable = true;
1488 for (auto e : i->uncorrelated_exprs) {
1489 if (auto d = cast<const ast::Designator>(e)) {
1490 if (auto src = findSource(lower->sources(), d->get_table_name())) {
1491 emplace_back_unique(sources, src);
1492 } else {
1493 /* The source d->get_table_name() is not reachable. */
1494 all_reachable = false;
1495 break;
1496 }
1497 } else {
1498 /* Aggregation can not be assigned to a specific source so add all initially available ones. */
1499 sources = lower_sources;
1500 break;
1501 }
1502 }
1503 for (auto s : i->sources) {
1504 if (auto src = findSource(lower->sources(), s)) {
1505 emplace_back_unique(sources, src);
1506 } else {
1507 /* The source s is not reachable. */
1508 all_reachable = false;
1509 break;
1510 }
1511 }
1512 /* Add join. */
1513 if (all_reachable) {
1514 i->finish_decorrelation();
1515 if (auto j = findJoin(lower->joins(), sources)) {
1516 /* A join with the same data sources already exists so only update the predicate. */
1517 j->update_condition(cnf::CNF({i->clause}));
1518 } else {
1519 /* Create a new join for the data sources. */
1520 auto J = lower->joins_.emplace_back(new Join(cnf::CNF({i->clause}), sources));
1521 for (auto ds : J->sources())
1522 ds->add_join(J);
1523 }
1524 delete i;
1525 } else {
1526 /* Add `i` again to the correlation information. */
1527 if (is_equi)
1528 lower->correlation_info_->equi_.push_back(i);
1529 else
1530 lower->correlation_info_->non_equi_.push_back(i);
1531 }
1532 }
1533 }
1534
1537 DataSource * findSource(const std::vector<DataSource*> &sources, const char *name) {
1538 for (auto s : sources) {
1539 if (auto q = cast<Query>(s); name == s->name() or (q and findSource(q->query_graph()->sources(), name)))
1540 return s;
1541 }
1542 return nullptr;
1543 }
1544
1547 Join * findJoin(const std::vector<Join*> &joins, const std::vector<DataSource*> &sources) {
1548 std::unordered_set<DataSource*> cmp(sources.begin(), sources.end());
1549 for (auto j : joins) {
1550 if (cmp == std::unordered_set<DataSource*>(j->sources().begin(), j->sources().end()))
1551 return j;
1552 }
1553 return nullptr;
1554 }
1555
1557 void replace_designators(QueryGraph *graph, const std::vector<const ast::Expr*> *origin) {
1558 M_insist(graph->sources().size() == 1);
1559 ReplaceDesignators RP;
1560 auto ds = *graph->sources().begin();
1561 auto filter_new = ds->filter();
1562 auto expansions = RP.replace(filter_new, origin);
1563 ds->filter_ = filter_new;
1564 graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1565 if (graph->grouping()) {
1566 expansions = RP.replace(graph->group_by_, origin);
1567 graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1568 expansions = RP.replace(graph->aggregates_, origin);
1569 graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1570 expansions = RP.replace(graph->projections_, &graph->group_by());
1571 graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1572 } else {
1573 expansions = RP.replace(graph->projections_, origin);
1574 graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1575 }
1576 }
1577};
1578#endif
1579
1580/*======================================================================================================================
1581 * GraphBuilder
1582 *
1583 * An AST Visitor that constructs the query graph.
1584 *====================================================================================================================*/
1585
1586
1588 Catalog &C = Catalog::Get();
1589
1590 /*----- Collect all aggregates in the statement and save them in the graph. -----*/
1591 graph_->aggregates_ = get_aggregates(stmt);
1592
1593 /*----- Compute whether the query needs grouping. -----*/
1594 needs_grouping_ = bool(stmt.group_by) or graph_->aggregates().size() > 0;
1595
1596 /*----- Collect grouping keys of `stmt`. -----*/
1597 if (stmt.group_by) {
1598 auto &group_by = as<const ast::GroupByClause>(*stmt.group_by);
1599 for (auto &[grp, alias] : group_by.group_by)
1600 existing_grouping_keys_.emplace_back(*grp); // TODO correct?
1601 }
1602
1604 std::vector<std::pair<std::reference_wrapper<const ast::QueryExpr>, ThreadSafePooledOptionalString>> nested_queries_in_select;
1605
1606 /*----- Process FROM and create data sources. -----*/
1607 if (stmt.from) {
1608 auto &FROM = as<ast::FromClause>(*stmt.from);
1609 for (auto &tbl : FROM.from) {
1610 if (auto tok = std::get_if<ast::Token>(&tbl.source)) {
1611 /* Create a new base table. */
1612 M_insist(tbl.has_table());
1613 auto &base = graph_->add_source(tbl.alias ? tbl.alias.text : ThreadSafePooledOptionalString{}, tbl.table());
1614 named_sources_.emplace(base.name(), base);
1615 } else if (auto stmt = std::get_if<ast::Stmt*>(&tbl.source)) {
1616 M_insist(tbl.alias.text.has_value(), "every nested statement requires an alias");
1617 auto &select = as<ast::SelectStmt>(**stmt);
1618 /* Create a graph for the sub query. */
1619 auto graph = QueryGraph::Build(select);
1620 auto &Q = graph_->add_source(tbl.alias.text, std::move(graph));
1621 M_insist(tbl.alias);
1622 named_sources_.emplace(tbl.alias.text, Q);
1623 } else {
1624 M_unreachable("invalid variant");
1625 }
1626 }
1627 }
1628
1629 /* Do some computation before first nested query is decorrelated to ensure that `get_needed_attrs()` works as
1630 * intended: */
1631
1632 /*----- Process GROUP BY and collect grouping keys. -----*/
1633 if (stmt.group_by) {
1634 auto &GROUP_BY = as<ast::GroupByClause>(*stmt.group_by);
1635 for (auto &[grp, alias] : GROUP_BY.group_by)
1636 graph_->group_by_.emplace_back(*grp, alias.text);
1637 }
1638
1639 /*----- Process SELECT and collect projections. -----*/
1640 auto SELECT = as<ast::SelectClause>(stmt.select.get());
1641 for (auto &e : SELECT->expanded_select_all)
1642 graph_->projections_.emplace_back(*e, ThreadSafePooledOptionalString{}); // expansions never contain nested queries;
1643 // directly add to projections
1644 /* Collect nested queries in SELECT clause. */
1645 for (auto &s : SELECT->select) {
1646 if (auto query = cast<const ast::QueryExpr>(s.first)) // found nested query
1647 nested_queries_in_select.emplace_back(*query, s.second.text);
1648 else
1649 graph_->projections_.emplace_back(*s.first, s.second.text); // add to projections
1650 }
1651
1652 /*----- Process WHERE clause. -----*/
1653 if (stmt.where) {
1654 /*----- Get WHERE clause and convert to CNF. -----*/
1655 auto &WHERE = as<ast::WhereClause>(*stmt.where);
1656 auto cnf_where = cnf::to_CNF(*WHERE.where);
1657
1658 /*----- Analyze all CNF clauses of the WHERE clause. -----*/
1659 for (auto &clause : cnf_where)
1660 process_selection(clause);
1661
1662 /*----- Introduce additional grouping keys. -----*/
1663 for (auto e : additional_grouping_keys_) {
1664 graph_->group_by_.emplace_back(e, ThreadSafePooledOptionalString{});
1665 graph_->projections_.emplace_back(e, ThreadSafePooledOptionalString{});
1666 }
1667
1668 for (auto &[clause, CI] : bound_clauses_) {
1669 if (CI.is_constant()) {
1670 for (auto &[_, src] : named_sources_)
1671 src.get().update_filter(cnf::CNF{std::move(clause)}); // NOTE: kind of silly, but it works
1672 } else if (CI.is_selection()) {
1673 auto it = named_sources_.find(*CI.data_sources.begin());
1674 M_insist(it != named_sources_.end(), "data source with that name was not found");
1675 it->second.get().update_filter(cnf::CNF{std::move(clause)});
1676 } else {
1677 Join::sources_t sources;
1678 for (auto src_name : CI.data_sources) {
1679 auto it = named_sources_.find(src_name);
1680 M_insist(it != named_sources_.end(), "data source with that name was not found");
1681 sources.emplace_back(it->second);
1682 }
1683 auto J = std::make_unique<Join>(cnf::CNF{std::move(clause)}, std::move(sources));
1684 auto &ref = *graph_->joins_.emplace_back(std::move(J)); // add join to query graph
1685 for (auto ds : ref.sources())
1686 ds.get().add_join(ref); // add join to all data sources
1687 }
1688 }
1689 }
1690
1691#if 0
1692 /* Dissect CNF into joins, filters and nested queries. Performs unnesting immediately during graph
1693 * construction. Decorrelation is performed afterwards where possible. */
1694 /* TODO iterate over clauses in the order joins → filters/nested queries (equi) → nested queries (non-equi) */
1695 for (auto &clause : cnf_where) {
1696 const auto nested_queries = get_nested_queries(clause); // collect all nested queries of `clause`
1697 const auto tables = get_tables(clause);
1698 if (nested_queries.empty()) {
1699 /* Compute correlation information of this clause analogously. */
1700 if (tables.empty()) {
1701 /* This clause is a filter and constant. It applies to all data sources. Therefore, add it as a
1702 * filter condition to *each* source. */
1703 for (auto [_, source] : named_sources_)
1704 ; // TODO XXX
1705 source->update_filter(graph_->correlation_info_->compute(cnf::CNF{clause}));
1706 } else if (tables.size() == 1) {
1707 /* This clause is a filter condition on its single data source. */
1708 auto t = *begin(tables);
1709 auto ds = named_sources_.at(t);
1710 // TODO compute correlation information of `clause`
1711 // XXX
1712 // ds->update_filter(graph_->correlation_info_->compute(cnf::CNF({clause}))); //XXX
1713 } else {
1714 /* This clause is a join condition between two or more data sources. Create a new join in the query
1715 * graph. */
1716 Join::sources_t sources;
1717 for (auto t : tables)
1718 sources.emplace_back(named_sources_.at(t));
1719 // XXX
1720 // auto J = graph_->joins_.emplace_back(new Join(graph_->correlation_info_->compute(cnf::CNF({clause})), sources));
1721 // for (auto ds : J->sources())
1722 // ds->add_join(J);
1723 }
1724 } else {
1725 M_insist(nested_queries.size() == 1, "Multiple nested queries in one clause are not supported.");
1726 auto &nested_query = nested_queries.front().get();
1727 auto &nested_stmt = as<const ast::SelectStmt>(*nested_query.query); // get single, nested query
1728 auto &nested_select_clause = as<ast::SelectClause>(*nested_stmt.select);
1729 M_insist(not nested_select_clause.select_all, "* can not yet be used in nested queries.");
1730 M_insist(nested_select_clause.select.size() == 1, "only a single projection allowed");
1731
1732 /* Replace the alias of the single projection by `$res`. */
1733 auto &single_projection = nested_select_clause.select.front();
1734 single_projection.second.text = C.pool("$res");
1735
1736 /* Create a graph for the nested query. Unnest the query by adding it as a source to the outer graph */
1737 auto nested_query_name = nested_query.alias();
1738 auto &Q = graph_->add_source(nested_query_name, QueryGraph::Build(nested_stmt));
1739
1740 /* Create a dependent join between the nested query and all tables in the clause. The dependent join
1741 * will later be resolved through decorrelation. */
1742 if (tables.empty()) {
1743 /* This clause is predicate with nested query `Q` and *no* tables of outer. It behaves like a
1744 * regular selection on outer. Since it contains the nested query `Q`, we have to use a join to an
1745 * arbitrary data source. TODO be clever when selecting the relation to join to. */
1746 M_insist(not named_sources_.empty(), "cannot join nested query to any source");
1747 /* TODO if Q is correlated use the referenced table */
1748 auto &any_data_source = *named_sources_.begin(); // pick *any* data source
1749 auto &J = graph_->joins_.emplace_back(new Join(cnf::CNF({clause}), {Q, any_data_source.second}));
1750 Q.add_join(*J); // add join to Q
1751 any_data_source.second.get().add_join(*J); // add join to the chosen data source of outer
1752 } else {
1753 /* This clause is a join condition. */
1754 Join::sources_t sources{Q};
1755 /* Create a join between outer's tables in `clause` and the nested query (as data source) `Q`. */
1756 for (auto t : tables)
1757 sources.emplace_back(named_sources_.at(t));
1758 auto &J = graph_->joins_.emplace_back(new Join(cnf::CNF({clause}), sources));
1759 for (auto ds : J->sources())
1760 ds.get().add_join(*J);
1761 }
1762
1763 named_sources_.emplace(nested_query_name, Q); // XXX why?
1764
1765 /* Decorrelate the nested query iff it is correlated. */
1766 if (Q.is_correlated())
1767 /* move the correlated predicate (i.e. the one with a free variable) to outer */
1768 decorrelate(Q);
1769 }
1770 }
1771#endif
1772
1773 /* Implement HAVING as a regular selection filter on a sub query. */
1774 cnf::CNF cnf_having;
1775 if (stmt.having) {
1776 auto having_clause = as<ast::HavingClause>(stmt.having.get());
1777 cnf_having = cnf::to_CNF(*having_clause->having);
1778 auto sub_graph = std::exchange(graph_, std::make_unique<QueryGraph>());
1779 auto &sub = graph_->add_source(ThreadSafePooledOptionalString{}, std::move(sub_graph));
1780 sub.update_filter(cnf_having);
1781 named_sources_.emplace(C.pool("HAVING"), sub);
1782 /* Reset former computation of projection and move it after the HAVING. */
1783 std::swap(graph_->projections_, sub.query_graph().projections_);
1784 /* Update `decorrelated_`-flag. */
1785 if (sub.is_correlated())
1786 sub.decorrelated_ = false;
1787 }
1788
1789 /* Dissect CNF into filters and nested queries. Decorrelate if possible. */
1790 /* TODO iterate over clauses in the order filters/nested queries (equi) < nested queries (non-equi) */
1791 for (auto &clause : cnf_having) {
1792 ClauseInfo CI(clause);
1793 auto &nested_queries = CI.nested_queries;
1794 if (nested_queries.empty()) {
1795 // XXX
1796 // named_sources_.at("HAVING")->update_filter(graph_->correlation_info_->compute(cnf::CNF({clause})));
1797 } else {
1798 M_insist(nested_queries.size() == 1, "Multiple nested queries in one clause are not yet supported.");
1799 auto &nested_query = nested_queries.front().expr;
1800 auto &nested_stmt = as<const ast::SelectStmt>(*nested_query.query);
1801 auto &nested_select_clause = as<ast::SelectClause>(*nested_stmt.select);
1802 M_insist(not nested_select_clause.select_all, "* can not yet be used in nested queries.");
1803 M_insist(nested_select_clause.select.size() == 1);
1804
1805 /* Replace the alias of the expression by `$res`. */
1806 nested_select_clause.select.front().second.text = C.pool("$res");
1807
1808 /* Create a graph for the nested query. Unnest the query by adding it as a source to the outer graph */
1809 auto &q_name = nested_query.alias();
1810 auto &Q = graph_->add_source(q_name, QueryGraph::Build(nested_stmt));
1811 named_sources_.emplace(q_name, Q);
1812
1813 /* Create a join between the nested query and the HAVING. */
1814 auto &J = graph_->joins_.emplace_back(new Join(cnf::CNF({clause}), {Q, named_sources_.at(C.pool("HAVING"))}));
1815 for (auto ds : J->sources())
1816 ds.get().add_join(*J);
1817 }
1818 }
1819
1820 /* To implement nested queries in SELECT clause when the query also has grouping, create a subquery (similar to
1821 * HAVING) if not already done. */
1822 if (not nested_queries_in_select.empty() and not stmt.having and
1823 (not graph_->group_by_.empty() or not graph_->aggregates_.empty()))
1824 {
1825 auto sub_graph = std::exchange(graph_, std::make_unique<QueryGraph>());
1826 auto &sub = graph_->add_source(ThreadSafePooledOptionalString{}, std::move(sub_graph)); // anonymous source for nested query
1827 named_sources_.emplace(C.pool("SELECT"), sub);
1828 /* Reset former computation of projection and move it after the SELECT. */
1829 std::swap(graph_->projections_, sub.query_graph().projections_);
1830 /* Update `decorrelated_`-flag. */
1831 if (sub.is_correlated())
1832 sub.decorrelated_ = false;
1833 }
1834
1835 /* Add nested queries in SELECT. */
1836 /* TODO iterate over expressions in the order nested queries (equi) < nested queries (non-equi) */
1837 for (auto [_query, name] : nested_queries_in_select) {
1838 auto &query = _query.get();
1839
1840 auto &select = as<ast::SelectStmt>(*query.query);
1841 auto &select_clause = as<ast::SelectClause>(*select.select);
1842 M_insist(not select_clause.select_all, "* can not yet be used in nested queries.");
1843 M_insist(select_clause.select.size() == 1);
1844 /* Replace the alias of the expression by `$res`. */
1845 select_clause.select.front().second.text = C.pool("$res");
1846 /* Create a sub graph for the nested query. */
1847 auto sub = QueryGraph::Build(select);
1848 auto &q_name = query.alias();
1849 auto &Q = graph_->add_source(q_name, std::move(sub));
1850 /* Create a cartesian product between the nested query and an arbitrary source iff one exists. */
1851 if (not named_sources_.empty()) {
1852 /* TODO if Q is correlated use the referenced table */
1853 DataSource *source;
1854 try {
1855 source = &named_sources_.at(C.pool("SELECT")).get();
1856 } catch (std::out_of_range&) { try {
1857 source = &named_sources_.at(C.pool("HAVING")).get();
1858 } catch (std::out_of_range&) {
1859 source = &named_sources_.begin()->second.get(); // any source
1860 } }
1861 auto &J = graph_->joins_.emplace_back(new Join(cnf::CNF(), {Q, *source}));
1862 Q.add_join(*J);
1863 source->add_join(*J);
1864 }
1865 named_sources_.emplace(q_name, Q);
1866 graph_->projections_.emplace_back(query, name);
1867 }
1868
1869 /* Add order by. */
1870 if (stmt.order_by) {
1871 auto ORDER_BY = as<ast::OrderByClause>(stmt.order_by.get());
1872 for (auto &o : ORDER_BY->order_by)
1873 graph_->order_by_.emplace_back(*o.first, o.second);
1874 }
1875
1876 /* Add limit. */
1877 if (stmt.limit) {
1878 auto LIMIT = as<ast::LimitClause>(stmt.limit.get());
1879 errno = 0;
1880 graph_->limit_.limit = strtoull(*(LIMIT->limit.text), nullptr, 0);
1881 M_insist(errno == 0);
1882 if (LIMIT->offset) {
1883 graph_->limit_.offset = strtoull(*(LIMIT->offset.text), nullptr, 0);
1884 M_insist(errno == 0);
1885 }
1886 }
1887}
1888
1889
1890/*======================================================================================================================
1891 * QueryGraph
1892 *====================================================================================================================*/
1893
1895
1897
1898std::unique_ptr<QueryGraph> QueryGraph::Build(const ast::Stmt &stmt)
1899{
1900 GraphBuilder builder;
1901 builder(stmt);
1902 return builder.get();
1903}
1904
1906 // if (not correlation_info2_->is_correlated()) return true;
1907 // if (not correlation_info_->empty()) return true;
1908 // for (auto &ds : sources_) {
1909 // if (ds->is_correlated()) // recurse into data sources
1910 // return true;
1911 // }
1912 return false;
1913}
1914
1916{
1917 adjacency_matrix_ = std::make_unique<AdjacencyMatrix>(num_sources());
1918
1919 /* Iterate over all joins in the query graph. */
1920 for (auto &join : joins()) {
1921 if (join->sources().size() != 2)
1922 throw std::invalid_argument("building adjacency matrix for non-binary join");
1923 /* Take both join inputs and set the appropriate bit in the adjacency matrix. */
1924 auto i = join->sources()[0].get().id(); // first join input
1925 auto j = join->sources()[1].get().id(); // second join input
1926 adjacency_matrix_->at(i, j) = adjacency_matrix_->at(j, i) = true;
1927 }
1928}
1929
1930void QueryGraph::dot(std::ostream &out) const
1931{
1932 out << "graph query_graph\n{\n"
1933 << " forcelabels=true;\n"
1934 << " overlap=false;\n"
1935 << " labeljust=\"l\";\n"
1936 << " graph [compound=true];\n"
1937 << " graph [fontname = \"DejaVu Sans\"];\n"
1938 << " node [fontname = \"DejaVu Sans\"];\n"
1939 << " edge [fontname = \"DejaVu Sans\"];\n";
1940
1941 dot_recursive(out);
1942
1943 out << '}' << std::endl;
1944}
1945
1946void QueryGraph::dot_recursive(std::ostream &out) const
1947{
1948#define q(X) '"' << X << '"' // quote
1949#define id(X) q(std::hex << &X << std::dec) // convert virtual address to identifier
1950 for (auto &ds : sources()) {
1951 if (auto q = cast<Query>(ds.get()))
1952 q->query_graph().dot_recursive(out);
1953 }
1954
1955 out << "\n subgraph cluster_" << this << " {\n";
1956
1957 for (auto &ds : sources()) {
1958 out << " " << id(*ds) << " [label=<";
1959 out << "<B>" << ds->name() << "</B>";
1960 if (ds->filter().size())
1961 out << "<BR/><FONT COLOR=\"0.0 0.0 0.25\" POINT-SIZE=\"10\">"
1962 << html_escape(to_string(ds->filter()))
1963 << "</FONT>";
1964 out << ">,style=filled,fillcolor=\"0.0 0.0 0.8\"];\n";
1965 if (auto q = cast<Query>(ds.get()))
1966 out << " " << id(*ds) << " -- \"cluster_" << &q->query_graph() << "\";\n";
1967 }
1968
1969 for (auto &j : joins()) {
1970 out << " " << id(*j) << " [label=<" << html_escape(to_string(j->condition())) << ">,style=filled,fillcolor=\"0.0 0.0 0.95\"];\n";
1971 for (auto ds : j->sources())
1972 out << " " << id(*j) << " -- " << id(ds.get()) << ";\n";
1973 }
1974
1975 out << " label=<"
1976 << "<TABLE BORDER=\"0\" CELLPADDING=\"0\" CELLSPACING=\"0\">\n";
1977
1978 /* Limit */
1979 if (limit_.limit != 0 or limit_.offset != 0) {
1980 out << " <TR><TD ALIGN=\"LEFT\">\n"
1981 << " <B>λ</B><FONT POINT-SIZE=\"9\">"
1982 << limit_.limit << ", " << limit_.offset
1983 << "</FONT>\n"
1984 << " </TD></TR>\n";
1985 }
1986
1987 /* Order by */
1988 if (order_by_.size()) {
1989 out << " <TR><TD ALIGN=\"LEFT\">\n"
1990 << " <B>ω</B><FONT POINT-SIZE=\"9\">";
1991 for (auto it = order_by_.begin(), end = order_by_.end(); it != end; ++it) {
1992 if (it != order_by_.begin()) out << ", ";
1993 out << html_escape(to_string(it->first.get()));
1994 out << ' ' << (it->second ? "ASC" : "DESC");
1995 }
1996 out << "</FONT>\n"
1997 << " </TD></TR>\n";
1998 }
1999
2000 /* Projections */
2001 if (not projections_.empty()) {
2002 out << " <TR><TD ALIGN=\"LEFT\">\n"
2003 << " <B>π</B><FONT POINT-SIZE=\"9\">";
2004 for (auto it = projections_.begin(), end = projections_.end(); it != end; ++it) {
2005 if (it != projections_.begin())
2006 out << ", ";
2007 out << html_escape(to_string(it->first.get()));
2008 if (it->second.has_value())
2009 out << " AS " << html_escape(*it->second);
2010 }
2011 out << "</FONT>\n"
2012 << " </TD></TR>\n";
2013 }
2014
2015 /* Group by and aggregates */
2016 if (not group_by_.empty() or not aggregates_.empty()) {
2017 out << " <TR><TD ALIGN=\"LEFT\">\n"
2018 << " <B>γ</B><FONT POINT-SIZE=\"9\">";
2019 for (auto it = group_by_.begin(), end = group_by_.end(); it != end; ++it) {
2020 if (it != group_by_.begin()) out << ", ";
2021 out << html_escape(to_string(it->first.get()));
2022 if (it->second.has_value())
2023 out << " AS " << html_escape(*it->second);
2024 }
2025 if (not group_by_.empty() and not aggregates_.empty())
2026 out << ", ";
2027 for (auto it = aggregates_.begin(), end = aggregates_.end(); it != end; ++it) {
2028 if (it != aggregates_.begin()) out << ", ";
2029 out << html_escape(to_string(it->get()));
2030 }
2031 out << "</FONT>\n"
2032 << " </TD></TR>\n";
2033 }
2034
2035 out << " </TABLE>\n"
2036 << " >;\n"
2037 << " }\n";
2038#undef id
2039#undef q
2040}
2041
2042void QueryGraph::sql(std::ostream &out) const
2043{
2044 QueryGraph2SQL t(out);
2045 t(*this);
2046 out << ';' << std::endl;
2047}
2048
2050void QueryGraph::dump(std::ostream &out) const
2051{
2052 out << "QueryGraph {\n sources:";
2053
2054 /*----- Print sources. -------------------------------------------------------------------------------------------*/
2055 for (auto &src : sources()) {
2056 out << "\n ";
2057 if (auto q = cast<Query>(src.get())) {
2058 out << "(...)";
2059 } else {
2060 auto bt = as<BaseTable>(*src);
2061 out << bt.table().name();
2062 }
2063 if (src->alias().has_value())
2064 out << " AS " << src->alias();
2065 if (not src->filter().empty())
2066 out << " WHERE " << src->filter();
2067 }
2068
2069 /*----- Print joins. ---------------------------------------------------------------------------------------------*/
2070 if (joins().empty()) {
2071 out << "\n no joins";
2072 } else {
2073 out << "\n joins:";
2074 for (auto &j : joins()) {
2075 out << "\n {";
2076 auto &srcs = j->sources();
2077 for (auto it = srcs.begin(), end = srcs.end(); it != end; ++it) {
2078 if (it != srcs.begin()) out << ' ';
2079 if (it->get().alias().has_value())
2080 out << it->get().alias();
2081 else if (auto bt = cast<const BaseTable>(&it->get()))
2082 out << bt->name();
2083 else
2084 out << "(...)";
2085 }
2086 out << "} " << j->condition();
2087 }
2088 }
2089
2090 /*----- Print grouping and aggregation information. -------------------------------------------------------------*/
2091 if (group_by().empty() and aggregates().empty()) {
2092 out << "\n no grouping";
2093 } else {
2094 if (not group_by().empty()) {
2095 out << "\n group by: ";
2096 for (auto it = group_by().begin(), end = group_by().end(); it != end; ++it) {
2097 if (it != group_by().begin())
2098 out << ", ";
2099 out << it->first.get();
2100 if (it->second.has_value())
2101 out << " AS " << it->second;
2102 }
2103 }
2104 if (not aggregates().empty()) {
2105 out << "\n aggregates: ";
2106 for (auto it = aggregates().begin(), end = aggregates().end(); it != end; ++it) {
2107 if (it != aggregates().begin())
2108 out << ", ";
2109 out << it->get();
2110 }
2111 }
2112 }
2113
2114 /*----- Print ordering information. ------------------------------------------------------------------------------*/
2115 if (order_by().empty()) {
2116 out << "\n no order";
2117 } else {
2118 out << "\n order by: ";
2119 for (auto it = order_by().begin(), end = order_by().end(); it != end; ++it) {
2120 if (it != order_by().begin())
2121 out << ", ";
2122 out << it->first.get() << ' ' << (it->second ? "ASC" : "DESC");
2123 }
2124 }
2125
2126 /*----- Print projections. ---------------------------------------------------------------------------------------*/
2127 out << "\n projections: ";
2128 for (auto [expr, alias] : projections()) {
2129 if (alias.has_value()) {
2130 out << "\n AS " << alias;
2131 ast::ASTDumper P(out, 3);
2132 P(expr.get());
2133 } else {
2134 ast::ASTDumper P(out, 2);
2135 P(expr.get());
2136 }
2137 }
2138
2139 out << "\n}" << std::endl;
2140}
2141void QueryGraph::dump() const { dump(std::cerr); }
#define id(X)
#define q(X)
void emplace_back(std::vector< const ast::Expr * > &exprs, const ast::Designator *des)
Like std::vector::emplace_back() but adds only iff des is not already contained in exprs.
Definition: QueryGraph.cpp:580
void insert(std::vector< const ast::Expr * > &exprs, const std::vector< const ast::Designator * > &insertions)
Like std::vector::insert() but adds only those elements of insertions which are not already contained...
Definition: QueryGraph.cpp:610
void emplace_back_unique(std::vector< std::pair< const ast::Expr *, PooledOptionalString > > &pairs, const std::pair< const ast::Designator *, PooledOptionalString > &pair)
Like std::vector::emplace_back() but adds only iff pair is not already contained in pairs.
Definition: QueryGraph.cpp:559
auto get_primary_key(DataSource *source)
Helper structure to compute and provide primary keys.
Definition: QueryGraph.cpp:714
bool is_composable_of(const ast::Expr &expr, const std::vector< std::reference_wrapper< const ast::Expr > > components)
Computes whether the bound parts of expr are composable of elements in components.
Definition: QueryGraph.cpp:383
std::vector< std::reference_wrapper< const ast::FnApplicationExpr > > get_aggregates(const ast::SelectStmt &stmt)
Given a SelectStmt stmt, extract the aggregates to compute while grouping.
Definition: QueryGraph.cpp:348
#define SELECT(XXX, _1, _2, _3, FN,...)
struct @5 args
#define M_unreachable(MSG)
Definition: macro.hpp:146
#define M_insist(...)
Definition: macro.hpp:129
CNF M_EXPORT to_CNF(const ast::Expr &e)
Converts the Boolean Expr e to a CNF.
Definition: CNF.cpp:298
M_EXPORT const version_info & get()
Definition: version.cpp:4
PrimitiveExpr replace(U &&_value)
Definition: WasmDSL.hpp:2969
for(std::size_t idx=1;idx< num_vectors;++idx) res.emplace((vectors_[idx].bitmask()<< uint32_t(idx *vector_type return * res
Definition: WasmDSL.hpp:3696
std::size_t bool
Definition: WasmDSL.hpp:528
std::pair<::wasm::Expression *, std::list< std::shared_ptr< Bit > > > move()
Moves the underlying Binaryen ::wasm::Expression and the referenced bits out of this.
Definition: WasmDSL.hpp:1567
‍mutable namespace
Definition: Backend.hpp:10
bool M_EXPORT equal(const T &first, const U &second)
Checks whether first and second are equal considering permutations.
Definition: fn.hpp:391
bool streq(const char *first, const char *second)
Definition: fn.hpp:29
bool M_EXPORT contains(const H &haystack, const N &needle)
Checks whether haystack contains needle.
Definition: fn.hpp:383
std::string M_EXPORT html_escape(std::string str)
Escapes special characters in a string to be printable in HTML documents.
Definition: fn.cpp:60
std::string to_string(const TokenType tt)
Definition: TokenType.hpp:58
and
Definition: enum_ops.hpp:12
auto visit(Callable &&callable, Base &obj, m::tag< Callable > &&=m::tag< Callable >())
Generic implementation to visit a class hierarchy, with similar syntax as std::visit.
Definition: Visitor.hpp:138
STL namespace.
Helper structure to extract the QueryExprs in an expression.
Definition: QueryGraph.cpp:628
void operator()(Const< ast::Constant > &) override
Definition: QueryGraph.cpp:642
void operator()(Const< ast::Designator > &) override
Definition: QueryGraph.cpp:640
void operator()(Const< ast::QueryExpr > &e) override
Definition: QueryGraph.cpp:650
void operator()(Const< ast::ErrorExpr > &) override
Definition: QueryGraph.cpp:638
void operator()(Const< ast::UnaryExpr > &e) override
Definition: QueryGraph.cpp:646
void operator()(Const< ast::FnApplicationExpr > &) override
Definition: QueryGraph.cpp:644
std::vector< const ast::QueryExpr * > get()
Definition: QueryGraph.cpp:635
std::vector< const ast::QueryExpr * > queries_
Definition: QueryGraph.cpp:630
void operator()(Const< ast::BinaryExpr > &e) override
Definition: QueryGraph.cpp:648
The catalog contains all Databases and keeps track of all meta information of the database system.
Definition: Catalog.hpp:215
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
Definition: Catalog.hpp:274
static Catalog & Get()
Return a reference to the single Catalog instance.
Collects information of a cnf::Clause.
Definition: QueryGraph.hpp:32
ClauseInfo(const cnf::Clause &clause)
Collects correlation information of all cnf::Clauses occurring in the QueryGraph.
Definition: QueryGraph.cpp:418
std::unordered_set< ThreadSafePooledString > data_sources
Definition: QueryGraph.hpp:35
std::vector< SubqueryInfo > nested_queries
Definition: QueryGraph.hpp:36
unsigned binding_depth
Definition: QueryGraph.hpp:33
A DataSource in a QueryGraph.
Definition: QueryGraph.hpp:33
std::size_t id_
unique identifier of this data source within its query graph
Definition: QueryGraph.hpp:42
virtual ThreadSafePooledOptionalString name() const =0
Returns the name of this DataSource.
const ThreadSafePooledOptionalString & alias() const
Returns the alias of this DataSource.
Definition: QueryGraph.hpp:58
const auto & joins() const
Returns a reference to the Joins using this DataSource.
Definition: QueryGraph.hpp:69
void update_filter(cnf::CNF filter)
Adds filter to the current filter of this DataSource by logical conjunction.
Definition: QueryGraph.hpp:65
void add_join(Join &join)
Adds join to the set of Joins of this DataSource.
Definition: QueryGraph.hpp:67
bool decorrelated_
indicates whether this source is already decorrelated
Definition: QueryGraph.hpp:44
std::unique_ptr< QueryGraph > graph_
‍the query graph that is being constructed
Definition: QueryGraph.hpp:62
std::unordered_map< ThreadSafePooledString, std::reference_wrapper< DataSource > > named_sources_
‍maps DataSource names/aliases to the DataSource instance
Definition: QueryGraph.hpp:64
clause_map deferred_clauses_
‍to be handled by an outer query
Definition: QueryGraph.hpp:59
std::vector< std::reference_wrapper< const ast::Expr > > existing_grouping_keys_
‍the grouping keys of the statement
Definition: QueryGraph.hpp:52
void operator()(Const< ast::Stmt > &s)
Definition: QueryGraph.hpp:76
std::unordered_set< std::reference_wrapper< const ast::Expr > > additional_grouping_keys_
‍additionally required grouping keys to perform decorrelation
Definition: QueryGraph.hpp:54
clause_map bound_clauses_
‍to be handled by the current query
Definition: QueryGraph.hpp:57
bool needs_grouping_
‍whether this graph needs grouping; either by explicily grouping or implicitly by using aggregations
Definition: QueryGraph.hpp:66
std::unique_ptr< QueryGraph > get()
‍returns the constructed QueryGraph
Definition: QueryGraph.hpp:72
void process_selection(cnf::Clause &clause)
Computes correlation information of clause.
Definition: QueryGraph.cpp:440
A Join in a QueryGraph combines DataSources by a join condition.
Definition: QueryGraph.hpp:138
std::vector< std::reference_wrapper< DataSource > > sources_t
Definition: QueryGraph.hpp:139
A data type representing a pooled (or internalized) object.
Definition: Pool.hpp:168
Translates a query graph in SQL.
The query graph represents all data sources and joins in a graph structure.
Definition: QueryGraph.hpp:172
std::vector< order_type > order_by_
the order
Definition: QueryGraph.hpp:189
struct m::QueryGraph::@0 limit_
limit: limit and offset
std::vector< std::unique_ptr< Join > > joins_
collection of all joins in this query graph
Definition: QueryGraph.hpp:184
const auto & group_by() const
Definition: QueryGraph.hpp:259
void add_source(std::unique_ptr< DataSource > source)
Definition: QueryGraph.hpp:229
bool is_correlated() const
Returns true iff the graph is correlated, i.e.
const std::vector< projection_type > & projections() const
Definition: QueryGraph.hpp:261
bool grouping() const
Returns true iff the graph contains a grouping.
Definition: QueryGraph.hpp:273
const auto & joins() const
Definition: QueryGraph.hpp:258
std::size_t num_sources() const
Returns the number of DataSources in this graph.
Definition: QueryGraph.hpp:224
void dot(std::ostream &out) const
Translates the query graph to dot.
void compute_adjacency_matrix() const
const auto & aggregates() const
Definition: QueryGraph.hpp:260
void dump() const
void sql(std::ostream &out) const
Translates the query graph to SQL.
void remove_join(Join &join)
Definition: QueryGraph.hpp:326
const auto & order_by() const
Definition: QueryGraph.hpp:262
std::vector< std::reference_wrapper< const ast::FnApplicationExpr > > aggregates_
the aggregates to compute
Definition: QueryGraph.hpp:187
const auto & sources() const
Definition: QueryGraph.hpp:257
std::vector< projection_type > projections_
the data to compute
Definition: QueryGraph.hpp:188
void dot_recursive(std::ostream &out) const
std::unique_ptr< AdjacencyMatrix > adjacency_matrix_
Definition: QueryGraph.hpp:192
std::vector< group_type > group_by_
the grouping keys
Definition: QueryGraph.hpp:186
std::vector< std::unique_ptr< DataSource > > sources_
collection of all data sources in this query graph
Definition: QueryGraph.hpp:183
uint64_t limit
Definition: QueryGraph.hpp:190
uint64_t offset
Definition: QueryGraph.hpp:190
static std::unique_ptr< QueryGraph > Build(const ast::Stmt &stmt)
A Query in a QueryGraph is a DataSource that represents a nested query.
Definition: QueryGraph.hpp:116
bool is_correlated() const override
Returns true iff the data source is correlated.
Definition: QueryGraph.cpp:21
std::unique_ptr< QueryGraph > query_graph_
query graph of the sub-query
Definition: QueryGraph.hpp:120
QueryGraph & query_graph() const
Returns a reference to the internal QueryGraph.
Definition: QueryGraph.hpp:129
Dumps a textual representation of the AST to an output stream.
Definition: ASTDumper.hpp:12
A binary expression.
Definition: AST.hpp:348
A designator.
Definition: AST.hpp:134
ThreadSafePooledString get_table_name() const
Definition: AST.hpp:197
Token attr_name
Definition: AST.hpp:139
bool has_explicit_table_name() const
Definition: AST.hpp:192
bool has_table_name() const
Definition: AST.hpp:196
const target_type & target() const
Definition: AST.hpp:203
An expression.
Definition: AST.hpp:39
const Type * type() const
Returns the Type of this Expr.
Definition: AST.hpp:58
A function application.
Definition: AST.hpp:246
A query expression for nested queries.
Definition: AST.hpp:389
A SQL select statement.
Definition: AST.hpp:936
std::unique_ptr< Clause > from
Definition: AST.hpp:938
std::unique_ptr< Clause > select
Definition: AST.hpp:937
std::unique_ptr< Clause > where
Definition: AST.hpp:939
std::unique_ptr< Clause > order_by
Definition: AST.hpp:942
std::unique_ptr< Clause > having
Definition: AST.hpp:941
std::unique_ptr< Clause > group_by
Definition: AST.hpp:940
std::unique_ptr< Clause > limit
Definition: AST.hpp:943
A SQL statement.
Definition: AST.hpp:794
A unary expression: "+e", "-e", "~e", "NOT e".
Definition: AST.hpp:324
A CNF represents a conjunction of cnf::Clauses.
Definition: CNF.hpp:134
A cnf::Clause represents a disjunction of Predicates.
Definition: CNF.hpp:87
static Predicate Create(const ast::Expr *e, bool is_negative)
Creates a Predicate from e.
Definition: CNF.hpp:28
Signals that an argument to a function of method was invalid.
Definition: exception.hpp:37
Definition: tag.hpp:8
Exception class which can be thrown to skip recursion of the subtree in pre-order visitors.
Definition: Visitor.hpp:18