mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
PlanEnumerator.cpp
Go to the documentation of this file.
2
3#include <algorithm>
4#include <cstring>
5#include <execution>
6#include <functional>
7#include <iostream>
8#include <iterator>
9#include <memory>
12#include <mutable/util/ADT.hpp>
13#include <mutable/util/fn.hpp>
16#include <queue>
17#include <set>
18#include <type_traits>
19#ifdef __BMI2__
20#include <x86intrin.h>
21#endif
22
23
24using namespace m;
25using namespace m::pe;
26
27
28/*======================================================================================================================
29 * PEall
30 *====================================================================================================================*/
31
33struct PEall final : PlanEnumeratorCRTP<PEall>
34{
36 using base_type::operator();
37
38 template<typename PlanTable>
39 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const {
40 auto &sources = G.sources();
41 const std::size_t n = sources.size();
43
44 for (std::size_t i = 1, end = 1UL << n; i < end; ++i) {
45 Subproblem S(i);
46 if (S.size() == 1) continue; // skip
47 /* Compute break condition to avoid enumerating symmetric subproblems. */
48 uint64_t offset = S.capacity() - __builtin_clzl(uint64_t(S));
49 M_insist(offset != 0, "invalid subproblem offset");
50 Subproblem limit = Subproblem::Singleton(offset - 1);
51 for (Subproblem S1(least_subset(S)); S1 != limit; S1 = Subproblem(next_subset(S1, S))) {
52 Subproblem S2 = S - S1; // = S \ S1;
53 M_insist(PT.has_plan(S1), "must have found the optimal plan for S1");
54 M_insist(PT.has_plan(S2), "must have found the optimal plan for S2");
55 /* Exploit commutativity of join. */
56 cnf::CNF condition; // TODO use join condition
57 PT.update(G, CE, CF, S1, S2, condition);
58 }
59 }
60 }
61};
62
63
64/*======================================================================================================================
65 * DPsize
66 *====================================================================================================================*/
67
69struct DPsize final : PlanEnumeratorCRTP<DPsize>
70{
72 using base_type::operator();
73
74 template<typename PlanTable>
75 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const {
76 auto &sources = G.sources();
77 std::size_t n = sources.size();
78 const AdjacencyMatrix &M = G.adjacency_matrix();
80
81 /* Process all subplans of size greater than one. */
82 for (std::size_t s = 2; s <= n; ++s) {
83 for (std::size_t s1 = 1; s1 < s; ++s1) {
84 std::size_t s2 = s - s1;
85 /* Check for all combinations of subsets if they are valid joins and if so, forward the combination to the
86 * plan table. */
87 for (auto S1 = GospersHack::enumerate_all(s1, n); S1; ++S1) { // enumerate all subsets of size `s1`
88 if (not PT.has_plan(*S1)) continue; // subproblem S1 not connected -> skip
89 for (auto S2 = GospersHack::enumerate_all(s2, n); S2; ++S2) { // enumerate all subsets of size `s - s1`
90 if (not PT.has_plan(*S2)) continue; // subproblem S2 not connected -> skip
91 if (*S1 & *S2) continue; // subproblems not disjoint -> skip
92 if (not M.is_connected(*S1, *S2)) continue; // subproblems not connected -> skip
93 cnf::CNF condition; // TODO use join condition
94 PT.update(G, CE, CF, *S1, *S2, condition);
95 }
96 }
97 }
98 }
99 }
100};
101
102
103/*======================================================================================================================
104 * DPsizeOpt
105 *====================================================================================================================*/
106
110struct DPsizeOpt final : PlanEnumeratorCRTP<DPsizeOpt>
111{
113 using base_type::operator();
114
115 template<typename PlanTable>
116 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const {
117 auto &sources = G.sources();
118 std::size_t n = sources.size();
119 const AdjacencyMatrix &M = G.adjacency_matrix();
121
122 /* Process all subplans of size greater than one. */
123 for (std::size_t s = 2; s <= n; ++s) {
124 std::size_t m = s / 2; // division with rounding down
125 for (std::size_t s1 = 1; s1 <= m; ++s1) {
126 std::size_t s2 = s - s1;
127 /* Check for all combinations of subsets if they are valid joins and if so, forward the combination to the
128 * plan table. */
129 if (s1 == s2) { // exploit commutativity of join: if A⋈ B is possible, so is B⋈ A
130 for (auto S1 = GospersHack::enumerate_all(s1, n); S1; ++S1) { // enumerate all subsets of size `s1`
131 if (not PT.has_plan(*S1)) continue; // subproblem not connected -> skip
133 for (++S2; S2; ++S2) { // enumerate only the subsets following S1
134 if (not PT.has_plan(*S2)) continue; // subproblem not connected -> skip
135 if (*S1 & *S2) continue; // subproblems not disjoint -> skip
136 if (not M.is_connected(*S1, *S2)) continue; // subproblems not connected -> skip
137 /* Exploit commutativity of join. */
138 cnf::CNF condition; // TODO use join condition
139 PT.update(G, CE, CF, *S1, *S2, condition);
140 }
141 }
142 } else {
143 for (auto S1 = GospersHack::enumerate_all(s1, n); S1; ++S1) { // enumerate all subsets of size `s1`
144 if (not PT.has_plan(*S1)) continue; // subproblem not connected -> skip
145 for (auto S2 = GospersHack::enumerate_all(s2, n); S2; ++S2) { // enumerate all subsets of size `s2`
146 if (not PT.has_plan(*S2)) continue; // subproblem not connected -> skip
147 if (*S1 & *S2) continue; // subproblems not disjoint -> skip
148 if (not M.is_connected(*S1, *S2)) continue; // subproblems not connected -> skip
149 /* Exploit commutativity of join. */
150 cnf::CNF condition; // TODO use join condition
151 PT.update(G, CE, CF, *S1, *S2, condition);
152 }
153 }
154 }
155 }
156 }
157 }
158};
159
160
161/*======================================================================================================================
162 * DPsizeSub
163 *====================================================================================================================*/
164
166struct DPsizeSub final : PlanEnumeratorCRTP<DPsizeSub>
167{
169 using base_type::operator();
170
171 template<typename PlanTable>
172 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const {
173 auto &sources = G.sources();
174 std::size_t n = sources.size();
175 const AdjacencyMatrix &M = G.adjacency_matrix();
177
178 /* Process all subplans of size greater than one. */
179 for (std::size_t s = 2; s <= n; ++s) {
180 for (auto S = GospersHack::enumerate_all(s, n); S; ++S) { // enumerate all subsets of size `s`
181 if (not M.is_connected(*S)) continue; // not connected -> skip
182 for (Subproblem O(least_subset(*S)); O != *S; O = Subproblem(next_subset(O, *S))) {
183 Subproblem Comp = *S - O;
184 M_insist(M.is_connected(O, Comp), "implied by S inducing a connected subgraph");
185 if (not PT.has_plan(O)) continue; // not connected -> skip
186 if (not PT.has_plan(Comp)) continue; // not connected -> skip
187 cnf::CNF condition; // TODO use join condition
188 PT.update(G, CE, CF, O, Comp, condition);
189 }
190 }
191 }
192 }
193};
194
195
196/*======================================================================================================================
197 * DPsub
198 *====================================================================================================================*/
199
201struct DPsub final : PlanEnumeratorCRTP<DPsub>
202{
204 using base_type::operator();
205
206 template<typename PlanTable>
207 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const {
208 auto &sources = G.sources();
209 const std::size_t n = sources.size();
210 const AdjacencyMatrix &M = G.adjacency_matrix();
212
213 for (std::size_t i = 1, end = 1UL << n; i < end; ++i) {
214 Subproblem S(i);
215 if (S.size() == 1) continue; // no non-empty and strict subset of S -> skip
216 if (not M.is_connected(S)) continue; // not connected -> skip
217 for (Subproblem S1(least_subset(S)); S1 != S; S1 = Subproblem(next_subset(S1, S))) {
218 Subproblem S2 = S - S1;
219 M_insist(M.is_connected(S1, S2), "implied by S inducing a connected subgraph");
220 if (not PT.has_plan(S1)) continue; // not connected -> skip
221 if (not PT.has_plan(S2)) continue; // not connected -> skip
222 cnf::CNF condition; // TODO use join condition
223 PT.update(G, CE, CF, S1, S2, condition);
224 }
225 }
226 }
227};
228
229
230/*======================================================================================================================
231 * DPsubOpt
232 *====================================================================================================================*/
233
236struct DPsubOpt final : PlanEnumeratorCRTP<DPsubOpt>
237{
239 using base_type::operator();
240
241 template<typename PlanTable>
242 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const {
243 auto &sources = G.sources();
244 const std::size_t n = sources.size();
245 const AdjacencyMatrix &M = G.adjacency_matrix();
247
248 for (std::size_t i = 1, end = 1UL << n; i < end; ++i) {
249 Subproblem S(i);
250 if (S.size() == 1) continue; // no non-empty and strict subset of S -> skip
251 if (not M.is_connected(S)) continue;
252 /* Compute break condition to avoid enumerating symmetric subproblems. */
253 uint64_t offset = S.capacity() - __builtin_clzl(uint64_t(S));
254 M_insist(offset != 0, "invalid subproblem offset");
255 Subproblem limit = Subproblem::Singleton(offset - 1);
256 for (Subproblem S1(least_subset(S)); S1 != limit; S1 = Subproblem(next_subset(S1, S))) {
257 Subproblem S2 = S - S1; // = S \ S1;
258 M_insist(M.is_connected(S1, S2), "implied by S inducing a connected subgraph");
259 if (not PT.has_plan(S1)) continue; // not connected -> skip
260 if (not PT.has_plan(S2)) continue; // not connected -> skip
261 /* Exploit commutativity of join. */
262 cnf::CNF condition; // TODO use join condition
263 PT.update(G, CE, CF, S1, S2, condition);
264 }
265 }
266 }
267};
268
269
270/*======================================================================================================================
271 * DPccp
272 *====================================================================================================================*/
273
274template<typename PlanTable>
275void DPccp::operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
276{
277 const AdjacencyMatrix &M = G.adjacency_matrix();
279 const Subproblem All = SmallBitset::All(G.num_sources());
280 cnf::CNF condition; // TODO use join condition
281
282 auto handle_CSG_pair = [&](const Subproblem left, const Subproblem right) {
283 PT.update(G, CE, CF, left, right, condition);
284 };
285
286 M.for_each_CSG_pair_undirected(All, handle_CSG_pair);
287}
288
289
290/*======================================================================================================================
291 * IK/KBZ
292 *====================================================================================================================*/
293
298struct IKKBZ final : PlanEnumeratorCRTP<IKKBZ>
299{
301 using base_type::operator();
302
303 template<typename PlanTable>
304 std::vector<std::size_t>
305 linearize(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
306 const CardinalityEstimator &CE) const
307 {
308 /* Computes the selectivity between two relations, identified by indices `u` and `v`, respectively. */
309 auto selectivity = [&](std::size_t u, std::size_t v) {
312 auto &model_u = *PT[U].model;
313 auto &model_v = *PT[V].model;
314 auto &joined = PT[U|V];
315 if (not joined.model) {
316 cnf::CNF condition; // TODO use join condition
317 joined.model = CE.estimate_join(G, model_u, model_v, condition);
318 }
319 const double cardinality_joined = double(CE.predict_cardinality(*joined.model));
320 return cardinality_joined / double(CE.predict_cardinality(model_u)) / double(CE.predict_cardinality(model_v));
321 };
322
323 /* Compute MST w.r.t. selectivity of join edges. See Ravi Krishnamurthy, Haran Boral, and Carlo Zaniolo.
324 * "Optimization of Nonrecursive Queries." */
325 const AdjacencyMatrix MST = M.minimum_spanning_forest(selectivity);
326
327 std::vector<std::size_t> linearization;
328 linearization.reserve(G.num_sources());
329 std::vector<std::size_t> best_linearization;
330 best_linearization.reserve(G.num_sources());
331 double least_cost = std::numeric_limits<decltype(least_cost)>::infinity();
332
333 /* The *rank* of a relation is defined as the factor by which the result set grows when joining with this
334 * relation. The rank is defined for a *child* relation and its *parent* relation w.r.t. the MST.
335 * "Intuitively, the rank measures the increase in the intermediate result per unit differential cost of doing
336 * the join." */
337 auto rank = [&](std::size_t parent_id, std::size_t child_id) -> double {
338 const SmallBitset parent = SmallBitset::Singleton(parent_id);
339 const SmallBitset child = SmallBitset::Singleton(child_id);
340 M_insist(MST.is_connected(parent, child), "relations must be joinable");
341 M_insist(MST.neighbors(parent)[child_id]);
342
343 const auto &model_parent = *PT[parent].model;
344 const auto &model_child = *PT[child].model;
345 if (not PT[parent|child].model) {
346 cnf::CNF condition; // TODO use join condition
347 PT[parent|child].model = CE.estimate_join(G, model_parent, model_child, condition);
348 }
349 const double cardinality_joined = CE.predict_cardinality(*PT[parent|child].model);
350 const double cardinality_parent = CE.predict_cardinality(model_parent);
351 const double cardinality_child = CE.predict_cardinality(model_child);
352 auto g = [](double cardinality) -> double { return 1 * cardinality; }; // TODO adapt to cost function
353 return (cardinality_joined - cardinality_parent) / g(cardinality_child);
354 };
355
356 struct ranked_relation
357 {
358 private:
359 std::size_t id_;
360 double rank_;
361
362 public:
363 ranked_relation(std::size_t id, double rank) : id_(id), rank_(rank) { }
364
365 std::size_t id() const { return id_; }
366 double rank() const { return rank_; }
367
368 bool operator<(const ranked_relation &other) const { return this->rank() < other.rank(); }
369 bool operator>(const ranked_relation &other) const { return this->rank() > other.rank(); }
370 };
371
372 /*---- Consider each relation as root and linearize the query tree. -----*/
373 for (std::size_t root_id = 0; root_id != G.num_sources(); ++root_id) {
374 Subproblem root = Subproblem::Singleton(root_id);
375
376 linearization.clear();
377 linearization.emplace_back(root_id);
378
379 /* Initialize MIN heap with successors of root of query tree. */
380 std::priority_queue<ranked_relation, std::vector<ranked_relation>, std::greater<ranked_relation>> Q;
381 for (std::size_t n : MST.neighbors(root))
382 Q.emplace(n, rank(root_id, n));
383
384 Subproblem joined = root;
385 while (not Q.empty()) {
386 ranked_relation ranked_relation = Q.top();
387 Q.pop();
388
389 const Subproblem R = Subproblem::Singleton(ranked_relation.id());
390 M_insist((joined & R).empty());
391 M_insist(MST.is_connected(joined, R));
392 linearization.emplace_back(ranked_relation.id());
393
394 /*----- Join next relation with already joined relations. -----*/
395 cnf::CNF condition; // TODO use join condition
396 PT.update(G, CE, CF, joined, R, condition);
397 joined |= R; // add R to the joined relations
398
399 /*----- Add all children of `R` to the priority queue. -----*/
400 const Subproblem N = MST.neighbors(R) - joined;
401 for (std::size_t n : N)
402 Q.emplace(n, rank(ranked_relation.id(), n));
403 }
404
405 /* Get the cost of the final plan. */
406 const double cost = PT[joined].cost;
407
408 /*----- Clear plan table. -----*/
409 Subproblem runner = root;
410 M_insist(linearization[0] == root_id);
411 for (std::size_t i = 1; i != linearization.size(); ++i) {
412 const std::size_t R = linearization[i];
413 M_insist(not runner[R]);
414 runner[R] = true;
415 M_insist(not runner.is_singleton());
416 M_insist(bool(PT[runner].model), "must have computed a model during linearization");
417 M_insist(bool(PT[runner].left), "must have a left subplan");
418 M_insist(bool(PT[runner].right), "must have a right subplan");
419 PT[runner] = PlanTableEntry();
420 }
421
422 if (cost < least_cost) {
423 using std::swap;
424 swap(best_linearization, linearization);
425 least_cost = cost;
426 }
427 }
428
429 return best_linearization;
430 }
431
432 template<typename PlanTable>
433 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const {
434 const AdjacencyMatrix &M = G.adjacency_matrix();
436
437 /* Linearize the vertices. */
438 const std::vector<std::size_t> linearization = linearize(PT, G, M, CF, CE);
439 M_insist(linearization.size() == G.num_sources());
440
441 /*----- Reconstruct the right-deep plan from the linearization. -----*/
442 Subproblem right = Subproblem::Singleton(linearization[0]);
443 for (std::size_t i = 1; i < G.num_sources(); ++i) {
444 const Subproblem left = Subproblem::Singleton(linearization[i]);
445 cnf::CNF condition; // TODO use join condition
446 PT.update(G, CE, CF, left, right, condition);
447 right = left | right;
448 }
449 }
450};
451
452
453/*======================================================================================================================
454 * LinearizedDP
455 *====================================================================================================================*/
456
457struct LinearizedDP final : PlanEnumeratorCRTP<LinearizedDP>
458{
460 using base_type::operator();
461
462 struct Sequence
463 {
464 private:
465 const std::vector<std::size_t> &linearization_;
467 std::size_t begin_;
469 std::size_t end_;
472
473 public:
474 Sequence(const std::vector<std::size_t> &linearization, std::size_t begin, std::size_t end)
475 : linearization_(linearization), begin_(begin), end_(end)
476 {
477 S_ = compute_subproblem_from_sequence(begin_, end_);
478 }
479
480 std::size_t length() const { return end_ - begin_; }
481 Subproblem subproblem() const { return S_; }
483 std::size_t first_index() const { return begin_; }
485 std::size_t last_index() const { return end_ - 1; }
486 std::size_t end() const { return end_; }
487
488 std::size_t first() const { return linearization_[first_index()]; }
489 std::size_t last() const { return linearization_[last_index()]; }
490
491 bool is_at_front() const { return begin_ == 0; }
492 bool is_at_back() const { return end_ == linearization_.size(); }
493
494 Subproblem compute_subproblem_from_sequence(std::size_t begin, std::size_t end) {
495 M_insist(begin < end);
496 M_insist(end <= linearization_.size());
497 Subproblem S;
498 while (begin != end)
499 S[linearization_[begin++]] = true;
500 return S;
501 }
502
504 M_insist(begin_ > 0);
505 --begin_;
506 S_[first()] = true;
507 }
508
510 M_insist(end_ < linearization_.size());
511 ++end_;
512 S_[last()] = true;
513 }
514
516 M_insist(begin_ < end_);
517 S_[first()] = false;
518 ++begin_;
519 }
520
522 M_insist(begin_ < end_);
523 S_[last()] = false;
524 --end_;
525 }
526
527 friend std::ostream & operator<<(std::ostream &out, const Sequence &seq) {
528 out << seq.first_index() << '-' << seq.last_index() << ": [";
529 for (std::size_t i = seq.first_index(); i <= seq.last_index(); ++i) {
530 if (i != seq.first_index()) out << ", ";
531 out << seq.linearization_[i];
532 }
533 return out << ']';
534 }
535
536 void dump(std::ostream &out) const { out << *this << std::endl; }
537 void dump() const { dump(std::cerr); }
538 };
539
540 template<typename PlanTable>
541 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const {
542 if (G.num_sources() <= 1) return;
543
545 const AdjacencyMatrix &M = G.adjacency_matrix();
546 const std::vector<std::size_t> linearization = IKKBZ{}.linearize(PT, G, M, CF, CE);
547 const std::size_t num_relations = G.num_sources();
548
549 Sequence seq(linearization, 0, 2);
550 bool moves_right = true;
551 for (;;) {
552 if (M.is_connected(seq.subproblem())){
553 /*----- Consider all dissections of `seq` into two sequences. -----*/
554 for (std::size_t mid = seq.first_index() + 1; mid <= seq.last_index(); ++mid) {
555 const Subproblem left = seq.compute_subproblem_from_sequence(seq.first_index(), mid);
556 const Subproblem right = seq.subproblem() - left;
557
558 const bool is_left_connected = PT.has_plan(left);
559 const bool is_right_connected = PT.has_plan(right);
560 if (is_left_connected and is_right_connected) {
561 cnf::CNF condition; // TODO use join condition
562 PT.update(G, CE, CF, left, right, condition);
563 }
564 }
565 }
566
567 if (seq.length() == num_relations)
568 break;
569
570 /*----- Move and grow the sequence. -----*/
571 if (moves_right) {
572 if (seq.is_at_back()) [[unlikely]] {
573 moves_right = false;
574 seq.extend_at_front();
575 } else {
576 seq.extend_at_back();
577 seq.shrink_at_front();
578 }
579 } else {
580 if (seq.is_at_front()) [[unlikely]] {
581 moves_right = true;
582 seq.extend_at_back();
583 } else {
584 seq.extend_at_front();
585 seq.shrink_at_back();
586 }
587 }
588 }
589 }
590};
591
592
593/*======================================================================================================================
594 * TDbasic
595 *====================================================================================================================*/
596
597struct TDbasic final : PlanEnumeratorCRTP<TDbasic>
598{
600 using base_type::operator();
601
602 template<typename PlanTable>
603 void PlanGen(const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE,
604 PlanTable &PT, Subproblem S) const
605 {
606 if (not PT.has_plan(S)) {
607 /* Naive Partitioning */
608 /* Iterate over all non-empty and strict subsets in `S`. */
609 for (Subproblem sub(least_subset(S)); sub != S; sub = Subproblem(next_subset(sub, S))) {
610 Subproblem complement = S - sub;
611 /* Check for valid connected subgraph complement pair. */
612 if (uint64_t(least_subset(sub)) < uint64_t(least_subset(complement)) and
613 M.is_connected(sub) and M.is_connected(complement))
614 {
615 /* Process `sub` and `complement` recursively. */
616 PlanGen(G, M, CF, CE, PT, sub);
617 PlanGen(G, M, CF, CE, PT, complement);
618
619 /* Update `PlanTable`. */
620 cnf::CNF condition; // TODO use join condition
621 PT.update(G, CE, CF, sub, complement, condition);
622 }
623 }
624 }
625 }
626
627 template<typename PlanTable>
628 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const {
629 auto &sources = G.sources();
630 std::size_t n = sources.size();
631 const AdjacencyMatrix &M = G.adjacency_matrix();
633
634 PlanGen(G, M, CF, CE, PT, Subproblem::All(n));
635 }
636};
637
638
639/*======================================================================================================================
640 * TDMinCutAGaT
641 *====================================================================================================================*/
642
643struct TDMinCutAGaT final : PlanEnumeratorCRTP<TDMinCutAGaT>
644{
646 using base_type::operator();
647
648 template<typename PlanTable>
649 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const {
650 const AdjacencyMatrix &M = G.adjacency_matrix();
652
653 auto handle_ccp = [&](const Subproblem first, const Subproblem second) -> void {
654 auto recurse = [&](const Subproblem first, const Subproblem second, auto recurse) -> void {
655 auto handle_ccp = std::bind(recurse, std::placeholders::_1, std::placeholders::_2, recurse);
656 /*----- Solve recursively. -----*/
657 if (not PT.has_plan(first))
658 MinCutAGaT{}.partition(M, handle_ccp, first);
659 M_insist(PT.has_plan(first));
660 if (not PT.has_plan(second))
661 MinCutAGaT{}.partition(M, handle_ccp, second);
662 M_insist(PT.has_plan(second));
663
664 /*----- Update `PlanTable`. -----*/
665 cnf::CNF condition; // TODO use join condition
666 PT.update(G, CE, CF, first, second, condition);
667 };
668 recurse(first, second, recurse);
669 };
670
671 if (G.num_sources() > 1) {
672 const Subproblem All = Subproblem::All(G.num_sources());
673 MinCutAGaT{}.partition(M, handle_ccp, All);
674 }
675 }
676};
677
678
679/*======================================================================================================================
680 * GOO
681 *====================================================================================================================*/
682
683template<typename PlanTable>
684void GOO::operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
685{
686 const AdjacencyMatrix &M = G.adjacency_matrix();
688
689 /*----- Initialize subproblems and their neighbors. -----*/
690 node nodes[G.num_sources()];
691 for (std::size_t i = 0; i != G.num_sources(); ++i) {
693 Subproblem N = M.neighbors(S);
694 nodes[i] = node(S, N);
695 }
696
697 /*----- Greedyly enumerate joins, thereby computing a plan. -----*/
698 compute_plan(PT, G, M, CF, CE, nodes, nodes + G.num_sources());
699}
700
701
702/*======================================================================================================================
703 * TDGOO
704 *====================================================================================================================*/
705
706template<typename PlanTable>
707void TDGOO::operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
708{
709 const AdjacencyMatrix &M = G.adjacency_matrix();
711
712 const Subproblem All = Subproblem::All(G.num_sources());
713 std::vector<Subproblem> worklist;
714 worklist.reserve(G.num_sources());
715 worklist.emplace_back(All);
716
717 /*----- Compute a plan with top-down GOO. -----*/
718 auto update_PT = [&](Subproblem left, Subproblem right) {
719 PT.update(G, CE, CF, left, right, cnf::CNF{}); // TODO: use actual condition
720 };
721 for_each_join(update_PT, PT, G, M, CF, CE, std::move(worklist));
722}
723
724
725#define LIST_PE(X) \
726 X(DPccp, "enumerates connected subgraph complement pairs") \
727 X(DPsize, "size-based subproblem enumeration") \
728 X(DPsizeOpt, "optimized DPsize: does not enumerate symmetric subproblems") \
729 X(DPsizeSub, "DPsize with enumeration of subset complement pairs") \
730 X(DPsub, "subset-based subproblem enumeration") \
731 X(DPsubOpt, "optimized DPsub: does not enumerate symmetric subproblems") \
732 X(GOO, "Greedy Operator Ordering") \
733 X(TDGOO, "Top-down variant of Greedy Operator Ordering") \
734 X(IKKBZ, "greedy algorithm by IK/KBZ, ordering joins by rank") \
735 X(LinearizedDP, "DP with search space linearization based on IK/KBZ") \
736 X(TDbasic, "basic top-down join enumeration using generate-and-test partitioning") \
737 X(TDMinCutAGaT, "top-down join enumeration using minimal graph cuts and advanced generate-and-test partitioning") \
738 X(PEall, "enumerates ALL join orders, inclding Cartesian products")
739
740#define INSTANTIATE(NAME, _) \
741 template void NAME::operator()(enumerate_tag, PlanTableSmallOrDense &PT, const QueryGraph &G, const CostFunction &CF) const; \
742 template void NAME::operator()(enumerate_tag, PlanTableLargeAndSparse &PT, const QueryGraph &G, const CostFunction &CF) const;
744#undef INSTANTIATE
745
746__attribute__((constructor(202)))
747static void register_plan_enumerators()
748{
749 Catalog &C = Catalog::Get();
750#define REGISTER(NAME, DESCRIPTION) \
751 C.register_plan_enumerator(C.pool(#NAME), std::make_unique<NAME>(), DESCRIPTION);
753#undef REGISTER
754}
#define REGISTER(CLASS)
__attribute__((constructor(202))) static void register_interpreter()
#define id(X)
#define LIST_PE(X)
#define INSTANTIATE(CLASS)
#define M_insist(...)
Definition: macro.hpp:129
‍plan enumerator namespace
‍mutable namespace
Definition: Backend.hpp:10
SmallBitset Subproblem
void swap(PlanTableBase< Actual > &first, PlanTableBase< Actual > &second)
Definition: PlanTable.hpp:394
SmallBitset least_subset(SmallBitset S)
Returns the least subset of a given set, i.e. the set represented by the lowest 1 bit.
Definition: ADT.hpp:267
and
Definition: enum_ops.hpp:12
and arithmetic< U > and same_signedness< T, U > U
Definition: concepts.hpp:90
SmallBitset next_subset(SmallBitset subset, SmallBitset set)
Returns the next subset of a given subset and `set.
Definition: ADT.hpp:270
Computes the join order using size-based dynamic programming.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Computes the join order using size-based dynamic programming.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Computes the join order using size-based dynamic programming.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Computes the join order using subset-based dynamic programming.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Computes the join order using subset-based dynamic programming.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Implements join ordering using the IK/KBZ algorithm.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
std::vector< std::size_t > linearize(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
Sequence(const std::vector< std::size_t > &linearization, std::size_t begin, std::size_t end)
std::size_t first() const
std::size_t first_index() const
Returns the index in linearization of the first element of this sequence.
Subproblem compute_subproblem_from_sequence(std::size_t begin, std::size_t end)
std::size_t end_
‍the index in the linearization one past the last element of this sequence
Subproblem subproblem() const
std::size_t end() const
std::size_t last() const
std::size_t last_index() const
Returns the index in linearization of the last element of this sequence.
friend std::ostream & operator<<(std::ostream &out, const Sequence &seq)
void dump(std::ostream &out) const
Subproblem S_
‍the subproblem formed by this sequence
std::size_t length() const
std::size_t begin_
‍the index in the linearization of the first element of this sequence
const std::vector< std::size_t > & linearization_
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Computes the join order by enumerating all join orders, including Cartesian products.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
void PlanGen(const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE, PlanTable &PT, Subproblem S) const
An adjacency matrix for a given query graph.
void for_each_CSG_pair_undirected(SmallBitset super, std::function< void(SmallBitset, SmallBitset)> callback) const
Enumerate all pairs of connected subgraphs (CSGs) that are connected by at least one edge.
bool is_connected(SmallBitset S) const
Returns true iff the subproblem S is connected.
AdjacencyMatrix minimum_spanning_forest(std::function< double(std::size_t, std::size_t)> weight) const
Computes the minimum spanning forest for this graph.
SmallBitset neighbors(SmallBitset S) const
Computes the neighbors of S.
virtual std::unique_ptr< DataModel > estimate_join(const QueryGraph &G, const DataModel &left, const DataModel &right, const cnf::CNF &condition) const =0
Form a new DataModel by joining two DataModels.
virtual std::size_t predict_cardinality(const DataModel &data) const =0
The catalog contains all Databases and keeps track of all meta information of the database system.
Definition: Catalog.hpp:215
Database & get_database_in_use()
Returns a reference to the Database that is currently in use, if any.
Definition: Catalog.hpp:295
static Catalog & Get()
Return a reference to the single Catalog instance.
std::unique_ptr< CardinalityEstimator > cardinality_estimator(std::unique_ptr< CardinalityEstimator > CE)
Sets the CardinalityEstimator of this Database.
Definition: Schema.hpp:946
Enumerate all subsets of size k based on superset of size n.
Definition: ADT.hpp:830
static GospersHack enumerate_from(SmallBitset set, uint64_t n)
Create an instance of GospersHack that enumerates all remaining subsets of a set of n elements,...
Definition: ADT.hpp:849
static GospersHack enumerate_all(uint64_t k, uint64_t n)
Create an instance of GospersHack that enumerates all subsets of size k of a set of n elements.
Definition: ADT.hpp:839
void partition(const AdjacencyMatrix &M, Callback &&callback, const Subproblem S) const
Definition: MinCutAGaT.hpp:73
The query graph represents all data sources and joins in a graph structure.
Definition: QueryGraph.hpp:172
std::size_t num_sources() const
Returns the number of DataSources in this graph.
Definition: QueryGraph.hpp:224
const auto & sources() const
Definition: QueryGraph.hpp:257
AdjacencyMatrix & adjacency_matrix()
Definition: QueryGraph.hpp:277
Implements a small and efficient set over integers in the range of 0 to 63 (including).
Definition: ADT.hpp:26
bool is_singleton() const
Definition: ADT.hpp:165
constexpr std::size_t capacity()
Returns the maximum capacity.
Definition: ADT.hpp:159
static SmallBitset All(std::size_t n)
Factory method for creating a SmallBitset with first n bits set.
Definition: ADT.hpp:117
std::size_t size() const
Returns the number of elements in this SmallBitset.
Definition: ADT.hpp:161
static SmallBitset Singleton(std::size_t n)
Factory method for creating a Singleton Smallbitset with n -th bit set.
Definition: ADT.hpp:125
A CNF represents a conjunction of cnf::Clauses.
Definition: CNF.hpp:134
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
void compute_plan(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE, node *begin, node *end) const
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
QueryGraph::Subproblem Subproblem
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
void for_each_join(Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &, const CardinalityEstimator &CE, std::vector< Subproblem > subproblems) const
Enumerate the sequence of graph cuts that yield the smallest subproblems in each step.