mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
HeuristicSearchPlanEnumerator.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <algorithm>
4#include <array>
5#include <boost/container/allocator.hpp>
6#include <boost/container/node_allocator.hpp>
7#include <boost/heap/binomial_heap.hpp>
8#include <boost/heap/fibonacci_heap.hpp>
9#include <boost/heap/pairing_heap.hpp>
10#include <cmath>
11#include <cstdint>
12#include <iostream>
13#include <iterator>
14#include <limits>
19#include <mutable/util/ADT.hpp>
20#include <mutable/util/crtp.hpp>
24#include <numeric>
25#include <ratio>
26#include <type_traits>
27#include <utility>
28#include <vector>
29
30
32namespace m {
33
35namespace pe {
36
38namespace hs {
39
41
42
43/*======================================================================================================================
44 * Helper functions
45 *====================================================================================================================*/
46
47namespace {
48
50inline bool subproblem_lt(Subproblem left, Subproblem right) { return uint64_t(left) < uint64_t(right); };
51
52}
53
54}
55}
56}
57
58
59namespace m::pe::hs {
60
61/*======================================================================================================================
62 * Heuristic Search States
63 *====================================================================================================================*/
64
66namespace search_states {
67
68// #define COUNTERS
69#if !defined(NDEBUG) && !defined(COUNTERS)
70#define COUNTERS
71#endif
72
79template<typename Actual>
80struct Base : crtp<Actual, Base>
81{
82 using crtp<Actual, Base>::actual;
83 using size_type = std::size_t;
84
85 /*----- State counters -------------------------------------------------------------------------------------------*/
86#ifdef COUNTERS
88 {
93
99 { }
100 };
101
102 private:
104
105 public:
111
116
119 state_counters_t old_counters = state_counters_;
120 state_counters_ = new_counters;
121 return old_counters;
122 }
123#else
124 static void RESET_STATE_COUNTERS() { /* nothing to be done */ };
125 static unsigned NUM_STATES_GENERATED() { return 0; }
126 static unsigned NUM_STATES_EXPANDED() { return 0; }
127 static unsigned NUM_STATES_CONSTRUCTED() { return 0; }
128 static unsigned NUM_STATES_DISPOSED() { return 0; }
129
130 static void INCREMENT_NUM_STATES_GENERATED() { /* nothing to be done */ }
131 static void INCREMENT_NUM_STATES_EXPANDED() { /* nothing to be done */ }
132 static void INCREMENT_NUM_STATES_CONSTRUCTED() { /* nothing to be done */ }
133 static void INCREMENT_NUM_STATES_DISPOSED() { /* nothing to be done */ }
134#endif
135
136 /*----- Getters --------------------------------------------------------------------------------------------------*/
137
138 const Actual * parent() const { return actual().parent(); }
139
140 template<typename PlanTable>
141 bool is_bottom(const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
142 const CardinalityEstimator &CE) const
143 {
144 return actual().is_bottom(PT, G, M, CF, CE);
145 }
146
147 template<typename PlanTable>
148 bool is_top(const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
149 const CardinalityEstimator &CE) const
150 {
151 return actual().is_top(PT, G, M, CF, CE);
152 }
153
155 double g() const { return actual().g(); }
156
157 /*----- Setters --------------------------------------------------------------------------------------------------*/
158
160 double decrease_g(const Actual *new_parent, double new_g) const { return actual().decrease_g(new_parent, new_g); }
161
162 /*----- Comparison -----------------------------------------------------------------------------------------------*/
163
164 bool operator==(const Base &other) const { return actual().operator==(other.actual()); }
165 bool operator!=(const Base &other) const { return actual().operator!=(other.actual()); }
166 bool operator<(const Base &other) const { return actual().operator<(other.actual()); }
167
170 template<typename Callback, typename PlanTable>
171 void for_each_successor(Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
172 const CostFunction &CF, const CardinalityEstimator &CE) const {
173 actual().for_each_successor(std::forward<Callback>(callback), PT, G, M, CF, CE);
174 }
175
176 /*----- Iteration ------------------------------------------------------------------------------------------------*/
177
178 template<typename Callback>
179 void for_each_subproblem(Callback &&callback, const QueryGraph &G) const {
180 actual().template for_each_subproblem<Callback>(std::forward<Callback>(callback), G);
181 }
182
183 /*----- Debugging ------------------------------------------------------------------------------------------------*/
185 void dump(std::ostream &out) const { out << actual() << std::endl; }
186 void dump() const { dump(std::cerr); }
188};
189
190#ifdef COUNTERS
191template<typename Actual>
192typename Base<Actual>::state_counters_t
194#endif
195
196struct SubproblemsArray : Base<SubproblemsArray>
197{
199 using allocator_type = boost::container::node_allocator<Subproblem>;
203
204 private:
207
208 public:
211
212 private:
213 mutable const SubproblemsArray *parent_ = nullptr;
215 mutable double g_;
222
223 /*----- The Big Four and a Half, copy & swap idiom ---------------------------------------------------------------*/
224 public:
225 friend void swap(SubproblemsArray &first, SubproblemsArray &second) {
226 using std::swap;
227 swap(first.parent_, second.parent_);
228 swap(first.g_, second.g_);
229 swap(first.size_, second.size_);
230 swap(first.marked_, second.marked_);
231 swap(first.subproblems_, second.subproblems_);
232 }
233
234 SubproblemsArray() = default;
235
237 template<typename PlanTable>
238 SubproblemsArray(const PlanTable&, const QueryGraph &G, const AdjacencyMatrix&, const CostFunction&,
240 Subproblem marked, Subproblem *subproblems)
241 : parent_(parent)
242 , g_(g)
243 , size_(size)
244 , marked_(marked)
245 , subproblems_(subproblems)
246 {
247 M_insist(parent_ == nullptr or parent_->g() <= this->g(), "cannot have less cost than parent");
248 M_insist(size == 0 or subproblems_);
249 M_insist(size <= G.num_sources());
250 M_insist(std::is_sorted(begin(), end(), subproblem_lt));
252 }
253
256
257 template<typename PlanTable>
258 SubproblemsArray(const SubproblemsArray &other, const PlanTable&, const QueryGraph&, const AdjacencyMatrix&,
259 const CostFunction&, const CardinalityEstimator&)
260 : g_(other.g_)
261 , size_(other.size_)
262 , marked_(other.marked_)
263 , subproblems_(allocator_.allocate(size_))
264 {
267 std::copy(other.begin(), other.end(), this->begin());
268 }
269
273 SubproblemsArray & operator=(SubproblemsArray other) { swap(*this, other); return *this; }
274
278 allocator_.deallocate(subproblems_, size_);
279 }
280
281 /*----- Factory methods ------------------------------------------------------------------------------------------*/
282
283 template<typename PlanTable>
284 static SubproblemsArray Bottom(const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
285 const CostFunction &CF, const CardinalityEstimator &CE)
286 {
287 auto subproblems = allocator_.allocate(G.num_sources());
288 for (uint64_t i = 0; i != G.num_sources(); ++i)
289 subproblems[i] = Subproblem::Singleton(i);
290 return SubproblemsArray(
291 /* Context= */ PT, G, M, CF, CE,
292 /* parent= */ nullptr,
293 /* cost= */ 0.,
294 /* size= */ G.num_sources(),
295 /* marked= */ Subproblem(1),
296 /* subproblems= */ subproblems
297 );
298 }
299
300 template<typename PlanTable>
301 static SubproblemsArray Top(const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
302 const CostFunction &CF, const CardinalityEstimator &CE)
303 {
304 const Subproblem All = Subproblem::All(G.num_sources());
305 auto subproblems = allocator_.allocate(1);
306 subproblems[0] = All;
307 return SubproblemsArray(
308 /* Context= */ PT, G, M, CF, CE,
309 /* parent= */ nullptr,
310 /* cost= */ 0.,
311 /* size= */ 1,
312 /* marked= */ All,
313 /* subproblems= */ subproblems
314 );
315 }
316
317 /*----- Getters --------------------------------------------------------------------------------------------------*/
318
319 const SubproblemsArray * parent() const { return parent_; }
320
321 template<typename PlanTable>
322 bool is_bottom(const PlanTable&, const QueryGraph &G, const AdjacencyMatrix&, const CostFunction&,
323 const CardinalityEstimator&) const
324 {
325 return size() == G.num_sources();
326 }
327
328 template<typename PlanTable>
329 bool is_top(const PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&,
330 const CardinalityEstimator&) const
331 {
332 return size() <= 1;
333 }
334
335 double g() const { return g_; }
336 Subproblem marked() const { return marked_; }
337 Subproblem mark(const Subproblem new_marked) { const Subproblem old = marked_; marked_ = new_marked; return old; }
338 void decrease_g(const SubproblemsArray *new_parent, double new_g) const {
339 M_insist(new_parent != this);
340 M_insist(new_parent->g() <= new_g);
341 M_insist(new_g <= this->g());
342 parent_ = new_parent;
343 g_ = new_g;
344 }
345 size_type size() const { return size_; }
346 Subproblem operator[](std::size_t idx) const { M_insist(idx < size_); return subproblems_[idx]; }
347
348 template<typename PlanTable>
349 static unsigned num_partitions(const PlanTable&, const QueryGraph &G, const AdjacencyMatrix&, const CostFunction&,
351 {
352 return G.num_sources();
353 }
354
355 template<typename PlanTable>
356 unsigned partition_id(const PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&,
357 const CardinalityEstimator&) const
358 {
359 M_insist(size() > 0);
360 return size() - 1;
361 }
362
363 /*----- Iteration ------------------------------------------------------------------------------------------------*/
364
366 iterator end() { return begin() + size(); }
367 const_iterator begin() const { return subproblems_; };
368 const_iterator end() const { return begin() + size(); }
369 const_iterator cbegin() const { return begin(); };
370 const_iterator cend() const { return end(); }
371
372 template<typename Callback>
373 void for_each_subproblem(Callback &&callback, const QueryGraph&) const {
374 for (Subproblem S : *this)
375 callback(S);
376 }
377
378 /*----- Comparison -----------------------------------------------------------------------------------------------*/
379
381 bool operator==(const SubproblemsArray &other) const {
382 if (this->size() != other.size()) return false;
383 M_insist(this->size() == other.size());
384 auto this_it = this->cbegin();
385 auto other_it = other.cbegin();
386 for (; this_it != this->cend(); ++this_it, ++other_it) {
387 if (*this_it != *other_it)
388 return false;
389 }
390 return true;
391 }
392
393 bool operator!=(const SubproblemsArray &other) const { return not operator==(other); }
394
395 bool operator<(const SubproblemsArray &other) const {
396 return std::lexicographical_compare(cbegin(), cend(), other.cbegin(), other.cend(), subproblem_lt);
397 }
398
400 friend std::ostream & operator<<(std::ostream &out, const SubproblemsArray &S) {
401 out << "g = " << S.g() << ", [";
402 for (auto it = S.cbegin(); it != S.cend(); ++it) {
403 if (it != S.cbegin()) out << ", ";
404 it->print_fixed_length(out, 15);
405 }
406 return out << ']';
407 }
408
409 void dump(std::ostream &out) const { out << *this << std::endl; }
410 void dump() const { dump(std::cerr); }
412};
413
414
415struct SubproblemTableBottomUp : Base<SubproblemTableBottomUp>
416{
418 using allocator_type = boost::container::node_allocator<Subproblem>;
420
421 private:
424
425 public:
428
429 private:
431 mutable double g_;
437 Subproblem *table_ = nullptr;
438
439 template<bool IsConst>
441 {
442 static constexpr bool is_const = IsConst;
443
444 using reference = std::conditional_t<is_const, const Subproblem&, Subproblem&>;
445 using pointer = std::conditional_t<is_const, const Subproblem*, Subproblem*>;
446
447 private:
453
454 public:
456 : table_(table) , size_(size) , idx_(idx), unique_(unique)
457 { }
458
460 M_insist(unique_ < size_, "out of bounds");
461 ++unique_; // count unique subproblems
462 X_ |= table_[idx_++]; // remember unique subproblem
463 if (unique_ < size_) // not at end
464 while (table_[idx_].is_subset(X_)) ++idx_; // find next unique subproblem
465 return *this;
466 }
467
468 bool operator==(the_iterator other) const {
469 return this->table_ == other.table_ and this->unique_ == other.unique_;
470 }
471
472 bool operator!=(the_iterator other) const { return not operator==(other); }
473
474 the_iterator operator++(int) { the_iterator clone = *this; operator++(); return clone; }
475
476 reference operator*() const { return table_[idx_]; }
477 pointer operator->() const { return &operator*(); }
478 };
479 public:
482
483
484 /*----- The Big Four and a Half, copy & swap idiom ---------------------------------------------------------------*/
485 public:
487 using std::swap;
488 swap(first.g_, second.g_);
489 swap(first.size_, second.size_);
490 swap(first.marker_, second.marker_);
491 swap(first.table_, second.table_);
492 }
493
495
497 template<typename PlanTable>
498 SubproblemTableBottomUp(const PlanTable&, const QueryGraph &G, const AdjacencyMatrix&, const CostFunction&,
500 : g_(g)
501 , size_(size)
502 , marker_(marker)
503 , table_(table)
504 {
505 M_insist(size == 0 or table);
506 M_insist(size <= G.num_sources());
508 }
509
511 template<typename PlanTable>
512 SubproblemTableBottomUp(const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
513 const CardinalityEstimator &CE)
514 : SubproblemTableBottomUp(PT, G, M, CF, CE, 0., G.num_sources(), Subproblem(), allocator_.allocate(G.num_sources()))
515 {
516 for (uint64_t i = 0; i != G.num_sources(); ++i)
518 }
519
522
523 template<typename PlanTable>
524 SubproblemTableBottomUp(const SubproblemTableBottomUp &other, const PlanTable&, const QueryGraph &G,
526 : g_(other.g_)
527 , size_(other.size_)
528 , marker_(other.marker_)
529 , table_(allocator_.allocate(size_))
530 {
533 std::copy_n(other.table_, G.num_sources(), this->table_);
534 }
535
539 SubproblemTableBottomUp & operator=(SubproblemTableBottomUp other) { swap(*this, other); return *this; }
540
544 allocator_.deallocate(table_, size_);
545 }
546
547 /*----- Getters --------------------------------------------------------------------------------------------------*/
548
549 template<typename PlanTable>
550 bool is_goal(const PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&,
551 const CardinalityEstimator&) const
552 {
553 return size() <= 1;
554 }
555 double g() const { return g_; }
556 Subproblem marker() const { return marker_; }
557 void decrease_g(double new_g) const { M_insist(new_g <= g_); g_ = new_g; }
558 size_type size() const { return size_; }
559 Subproblem & operator[](std::size_t idx) { M_insist(idx < size_); return table_[idx]; }
560 const Subproblem & operator[](std::size_t idx) const { return table_[idx]; }
561
562 template<typename PlanTable>
563 static unsigned num_partitions(const PlanTable&, const QueryGraph &G, const AdjacencyMatrix&, const CostFunction&,
565 {
566 return G.num_sources();
567 }
568
569 template<typename PlanTable>
570 unsigned partition_id(const PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&,
571 const CardinalityEstimator&) const
572 {
573 M_insist(size() > 0);
574 return size() - 1;
575 }
576
577 template<typename Callback>
578 void for_each_subproblem(Callback &&callback, const QueryGraph&) const {
579 for (Subproblem S : *this)
580 callback(S);
581 }
582
583 /*----- Iteration ------------------------------------------------------------------------------------------------*/
584
585 iterator begin() { return iterator(table_, size_, 0, 0); }
586 iterator end() { return iterator(table_, size_, 0, size_); }
587 const_iterator begin() const { return const_iterator(table_, size_, 0, 0); }
589 const_iterator cbegin() const { return begin(); }
590 const_iterator cend() const { return end(); }
591
592 /*----- Comparison -----------------------------------------------------------------------------------------------*/
593
595 bool operator==(const SubproblemTableBottomUp &other) const {
596 if (this->size() != other.size()) return false;
597 M_insist(this->size() == other.size());
598
600 std::size_t i = 0, j = 0;
601 while (i != size()) {
602 Subproblem T = this->table_[j];
603 if (T.is_subset(X)) { // subproblem already seen
604 ++j;
605 continue;
606 }
607 /* New unique subproblem, compare. */
608 Subproblem O = this->table_[j];
609 if (T != O) return false;
610 ++i; // count unique subproblems
611 X |= T; // remember unique subproblem
612 }
613 return true;
614 }
615
616 bool operator!=(const SubproblemTableBottomUp &other) const { return not operator==(other); }
617
619 M_unreachable("not implemented");
620 }
621
623 friend std::ostream & operator<<(std::ostream &out, const SubproblemTableBottomUp &S) {
624 out << "g = " << S.g() << ", [";
625 for (auto it = S.cbegin(); it != S.cend(); ++it) {
626 if (it != S.cbegin()) out << ", ";
627 it->print_fixed_length(out, 15);
628 }
629 return out << ']';
630 }
631
632 void dump(std::ostream &out) const { out << *this << std::endl; }
633 void dump() const { dump(std::cerr); }
635};
636
637
638struct EdgesBottomUp : Base<EdgesBottomUp>
639{
641 using allocator_type = boost::container::node_allocator<unsigned>;
643 using iterator = unsigned*;
644 using const_iterator = const unsigned*;
645
646 private:
649
650 public:
653
654 private:
658 mutable double g_;
662 unsigned *joins_ = nullptr;
663
664 /*----- The Big Four and a Half, copy & swap idiom ---------------------------------------------------------------*/
665 public:
666 friend void swap(EdgesBottomUp &first, EdgesBottomUp &second) {
667 using std::swap;
668 swap(first.g_, second.g_);
669 swap(first.num_joins_, second.num_joins_);
671 swap(first.joins_, second.joins_);
672 }
673
674 EdgesBottomUp() = default;
675
678 unsigned *joins)
680 , g_(g)
682 , joins_(joins)
683 {
684 M_insist(num_joins_ == 0 or bool(joins_));
685 M_insist(std::is_sorted(cbegin(), cend()), "joins must be sorted by index");
686#ifdef COUNTERS
687 if (num_joins_)
689#endif
690 }
691
695 { }
696
698 template<typename It>
701 , g_(g)
702 , num_joins_(std::distance(begin, end))
703 , joins_(allocator_.allocate(num_joins_))
704 {
705 M_insist(begin != end);
706 std::copy(begin, end, this->begin());
707 M_insist(std::is_sorted(cbegin(), cend()), "joins must be sorted by index");
708#ifdef COUNTERS
709 if (num_joins_)
711#endif
712 }
713
715 explicit EdgesBottomUp(const EdgesBottomUp &other)
717 , g_(other.g_)
718 , num_joins_(other.num_joins_)
719 , joins_(allocator_.allocate(num_joins_))
720 {
721 M_insist(bool(joins_));
722 M_insist(std::is_sorted(cbegin(), cend()), "joins must be sorted by index");
723 std::copy(other.begin(), other.end(), this->begin());
724#ifdef COUNTERS
725 if (num_joins_)
727#endif
728 }
729
732 { swap(*this, other); }
734 EdgesBottomUp & operator=(EdgesBottomUp other) { swap(*this, other); return *this; }
735
738#ifdef COUNTERS
739 if (num_joins_) {
740 M_insist(bool(joins_));
742 }
743#endif
744 allocator_.deallocate(joins_, num_joins_);
745 }
746
747 /*----- Factory methods. -----------------------------------------------------------------------------------------*/
748
750 return EdgesBottomUp(G.num_sources() - 1, 0, nullptr);
751 }
752
753 /*----- Getters --------------------------------------------------------------------------------------------------*/
754
755 template<typename PlanTable>
756 bool is_goal(const PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&,
757 const CardinalityEstimator&) const
758 {
759 return num_joins() == num_joins_to_goal_;
760 }
761 double g() const { return g_; }
762 void decrease_g(double new_g) const { M_insist(new_g <= g_); g_ = new_g; }
763 size_type num_joins() const { return num_joins_; }
766 return G.num_sources() - num_joins();
767 }
769 unsigned operator[](std::size_t idx) const { M_insist(idx < num_joins()); return joins_[idx]; }
770
771 /*----- Iteration ------------------------------------------------------------------------------------------------*/
772
773 iterator begin() { return joins_; };
774 iterator end() { return begin() + num_joins(); }
775 const_iterator begin() const { return joins_; };
776 const_iterator end() const { return begin() + num_joins(); }
777 const_iterator cbegin() const { return begin(); };
778 const_iterator cend() const { return end(); }
779
780 template<typename Callback>
781 void for_each_subproblem(Callback &&callback, const QueryGraph &G) const {
782 Subproblem subproblems[G.num_sources()];
783 uint8_t datasource_to_subproblem[G.num_sources()];
784 compute_datasource_to_subproblem_index(G, subproblems, datasource_to_subproblem);
785
786 for (Subproblem *it = subproblems, *end = subproblems + G.num_sources(); it != end; ++it) {
787 if (not it->empty())
788 callback(*it);
789 }
790 }
791
792 /*----- Comparison -----------------------------------------------------------------------------------------------*/
793
795 bool operator==(const EdgesBottomUp &other) const {
796 if (this->num_joins() != other.num_joins()) return false;
797 M_insist(this->num_joins() == other.num_joins());
798 auto this_it = this->cbegin();
799 auto other_it = other.cbegin();
800 for (; this_it != this->cend(); ++this_it, ++other_it) {
801 if (*this_it != *other_it)
802 return false;
803 }
804 return true;
805 }
806
807 bool operator!=(const EdgesBottomUp &other) const { return not operator==(other); }
808
809 /*----- Edge calculations ----------------------------------------------------------------------------------------*/
810
814 uint8_t *datasource_to_subproblem) const
815 {
816 /*----- Initialize datasource_to_subproblem reverse index. -----*/
817 for (std::size_t idx = 0; idx != G.num_sources(); ++idx) {
818 new (&datasource_to_subproblem[idx]) int8_t(idx);
819 new (&subproblems[idx]) Subproblem(Subproblem::Singleton(idx));
820 }
821
822 /*----- Update index with joins performed so far. -----*/
823 for (unsigned join_idx : *this) {
824 const Join &join = *G.joins()[join_idx];
825 const auto &sources = join.sources();
826 const unsigned left_idx = datasource_to_subproblem[sources[0].get().id()];
827 const unsigned right_idx = datasource_to_subproblem[sources[1].get().id()];
828 subproblems[left_idx] |= subproblems[right_idx];
829 for (auto id : subproblems[right_idx])
830 datasource_to_subproblem[id] = left_idx;
831 subproblems[right_idx] = Subproblem();
832 }
833
834#ifndef NDEBUG
835 for (std::size_t idx = 0; idx != G.num_sources(); ++idx) {
836 Subproblem S = subproblems[datasource_to_subproblem[idx]];
837 M_insist(not S.empty(), "reverse index must never point to an empty subproblem");
838 }
840 Subproblem combined;
841 unsigned num_subproblems_nonempty = 0;
842 for (std::size_t idx = 0; idx != G.num_sources(); ++idx) {
843 Subproblem S = subproblems[idx];
844 if (S.empty()) continue;
845 ++num_subproblems_nonempty;
846 M_insist((S & combined).empty(), "current subproblem must be disjoint from all others");
847 combined |= S;
848 }
849 M_insist(num_subproblems(G) == num_subproblems_nonempty);
850 M_insist(All == combined, "must not loose a relation");
851#endif
852 }
853
854
855 /*----- Debugging ------------------------------------------------------------------------------------------------*/
857 friend std::ostream & operator<<(std::ostream &out, const EdgesBottomUp &S) {
858 out << "g = " << S.g() << ", [";
859 for (auto it = S.cbegin(); it != S.cend(); ++it) {
860 if (it != S.cbegin()) out << ", ";
861 out << *it;
862 }
863 return out << ']';
864 }
865
866 void dump(std::ostream &out) const { out << *this << std::endl; }
867 void dump() const { dump(std::cerr); }
869};
870
871
872struct EdgePtrBottomUp : Base<EdgePtrBottomUp>
873{
876
877 private:
878 template<bool IsConst>
880 {
881 static constexpr bool is_const = IsConst;
882
883 using pointer_type = std::conditional_t<is_const, const EdgePtrBottomUp*, EdgePtrBottomUp*>;
884 using reference_type = std::conditional_t<is_const, const EdgePtrBottomUp&, EdgePtrBottomUp&>;
885
886 private:
888
889 public:
890 the_iterator() = default;
891 the_iterator(pointer_type state) : state_(state) { }
892
893 bool operator==(the_iterator other) const { return this->state_ == other.state_; }
894 bool operator!=(the_iterator other) const { return not operator==(other); }
895
898 state_ = state_->parent();
899 return *this;
900 }
901
903 the_iterator clone(this->state_);
904 operator++();
905 return clone;
906 }
907
910 };
911 public:
914
915 private:
916 /*----- Scratchpad -----*/
918 {
919 friend void swap(Scratchpad &first, Scratchpad &second) {
920 using std::swap;
921 swap(first.owner, second.owner);
922 swap(first.subproblems, second.subproblems);
924 }
925
926 const EdgePtrBottomUp *owner = nullptr;
928 uint8_t *datasource_to_subproblem = nullptr;
929
930 Scratchpad() = default;
932 : subproblems(new Subproblem[G.num_sources()])
933 , datasource_to_subproblem(new uint8_t[G.num_sources()])
934 { }
935
936 Scratchpad(const Scratchpad&) = delete;
937 Scratchpad(Scratchpad &&other) : Scratchpad() { swap(*this, other); }
938
940 delete[] subproblems;
942 owner = nullptr;
943 subproblems = nullptr;
944 datasource_to_subproblem = nullptr;
945 }
946
947 Scratchpad & operator=(Scratchpad other) { swap(*this, other); return *this; }
948
949 bool is_allocated() const { return subproblems != nullptr; }
950 bool owned_by(const EdgePtrBottomUp *state) const { return owner == state; }
951
952 void make_owner(const EdgePtrBottomUp *state, const QueryGraph &G) {
954 if (owned_by(state)) return; // nothing to be done
955 owner = state;
957 }
958 };
960
961 private:
962 mutable double g_;
963 const EdgePtrBottomUp *parent_ = nullptr;
964 unsigned join_id_;
965 unsigned num_joins_;
966
967 /*----- The Big Four and a Half, copy & swap idiom ---------------------------------------------------------------*/
968 public:
969 friend void swap(EdgePtrBottomUp &first, EdgePtrBottomUp &second) {
970 using std::swap;
971 swap(first.g_, second.g_);
972 swap(first.parent_, second.parent_);
973 swap(first.join_id_, second.join_id_);
974 swap(first.num_joins_, second.num_joins_);
975 }
976
979 : g_(0)
980 , num_joins_(0)
981 {
983 }
984
986 EdgePtrBottomUp(const EdgePtrBottomUp *parent, unsigned num_joins, double g, unsigned join_id)
987 : g_(g)
988 , parent_(parent)
991 {
993 }
994
996 if (parent_)
998 }
999
1003 EdgePtrBottomUp(EdgePtrBottomUp &&other) { swap(*this, other); }
1005 EdgePtrBottomUp & operator=(EdgePtrBottomUp other) { swap(*this, other); return *this; }
1006
1007 /*----- Factory methods. -----------------------------------------------------------------------------------------*/
1008
1010 if (not scratchpad_.is_allocated())
1012 return EdgePtrBottomUp(G);
1013 }
1014
1015 /*----- Getters --------------------------------------------------------------------------------------------------*/
1016
1017 template<typename PlanTable>
1018 bool is_goal(const PlanTable&, const QueryGraph &G, const AdjacencyMatrix&, const CostFunction&,
1019 const CardinalityEstimator&) const
1020 {
1021 return num_joins() == G.num_sources() - 1;
1022 }
1023
1024 double g() const { return g_; }
1025 void decrease_g(double new_g) const { M_insist(new_g <= g_); g_ = new_g; }
1026 const EdgePtrBottomUp * parent() const { return parent_; }
1027 unsigned join_id() const { M_notnull(parent_); return join_id_; }
1028 unsigned num_joins() const { return num_joins_; }
1029 unsigned num_joins_remaining(const QueryGraph &G) const { return G.num_sources() - 1 - num_joins(); }
1030
1031 /*----- Iteration ------------------------------------------------------------------------------------------------*/
1032
1033 iterator begin() { return iterator(this); }
1034 iterator end() { return iterator(); }
1035 const_iterator begin() const { return const_iterator(this); }
1036 const_iterator end() const { return const_iterator(); }
1037 const_iterator cbegin() const { return begin(); }
1038 const_iterator cend() const { return end(); }
1039
1040 template<typename Callback>
1041 void for_each_subproblem(Callback &&callback, const QueryGraph &G) const {
1042 Subproblem subproblems[G.num_sources()];
1043 uint8_t datasource_to_subproblem[G.num_sources()];
1044 compute_datasource_to_subproblem_index(G, subproblems, datasource_to_subproblem);
1045
1046 for (Subproblem *it = subproblems, *end = subproblems + G.num_sources(); it != end; ++it) {
1047 if (not it->empty())
1048 callback(*it);
1049 }
1050 }
1051
1052 /*----- Comparison -----------------------------------------------------------------------------------------------*/
1053
1055 bool operator==(const EdgePtrBottomUp &other) const {
1056 if (this->num_joins() != other.num_joins())
1057 return false;
1058
1059 /*----- Collect all joins of `this`. -----*/
1060 unsigned this_joins[num_joins()];
1061 auto ptr = this_joins;
1062 for (auto it = this->cbegin(); it->parent(); ++it, ++ptr)
1063 *ptr = it->join_id();
1064
1065 /*----- Compare with joins of `other`. -----*/
1066 const auto end = this_joins + num_joins();
1067 for (auto it = other.cbegin(); it->parent(); ++it) {
1068 auto pos = std::find(this_joins, end, it->join_id());
1069 if (pos == end)
1070 return false;
1071 }
1072 return true;
1073 }
1074
1075 bool operator!=(const EdgePtrBottomUp &other) const { return not operator==(other); }
1076
1077 /*----- Edge calculations ----------------------------------------------------------------------------------------*/
1078
1082 uint8_t *datasource_to_subproblem) const
1083 {
1084 /*----- Initialize datasource_to_subproblem reverse index. -----*/
1085 for (std::size_t idx = 0; idx != G.num_sources(); ++idx) {
1086 new (&datasource_to_subproblem[idx]) int8_t(idx);
1087 new (&subproblems[idx]) Subproblem(Subproblem::Singleton(idx));
1088 }
1089
1090 /*----- Update index with joins performed so far. -----*/
1091 const EdgePtrBottomUp *runner = this;
1092 while (runner->parent()) {
1093 const Join &join = *G.joins()[runner->join_id()];
1094 const auto &sources = join.sources();
1095 const unsigned left_idx = datasource_to_subproblem[sources[0].get().id()];
1096 const unsigned right_idx = datasource_to_subproblem[sources[1].get().id()];
1097 subproblems[left_idx] |= subproblems[right_idx];
1098 for (auto id : subproblems[right_idx])
1099 datasource_to_subproblem[id] = left_idx;
1100 subproblems[right_idx] = Subproblem();
1101 runner = runner->parent();
1102 }
1103
1104#ifndef NDEBUG
1105 for (std::size_t idx = 0; idx != G.num_sources(); ++idx) {
1106 Subproblem S = subproblems[datasource_to_subproblem[idx]];
1107 M_insist(not S.empty(), "reverse index must never point to an empty subproblem");
1108 }
1110 Subproblem combined;
1111 for (std::size_t idx = 0; idx != G.num_sources(); ++idx) {
1112 Subproblem S = subproblems[idx];
1113 if (S.empty()) continue;
1114 M_insist((S & combined).empty(), "current subproblem must be disjoint from all others");
1115 combined |= S;
1116 }
1117 M_insist(All == combined, "must not loose a relation");
1118#endif
1119 }
1120
1121
1122 /*----- Debugging ------------------------------------------------------------------------------------------------*/
1124 friend std::ostream & operator<<(std::ostream &out, const EdgePtrBottomUp &S) {
1125 out << "g = " << S.g() << ", [";
1126 for (auto it = S.cbegin(); it->parent(); ++it) {
1127 if (it != S.cbegin()) out << ", ";
1128 out << it->join_id();
1129 }
1130 return out << ']';
1131 }
1132
1133 void dump(std::ostream &out) const { out << *this << std::endl; }
1134 void dump() const { dump(std::cerr); }
1136};
1137
1138}
1139
1140}
1141
1142
1143namespace std {
1144
1145template<>
1146struct hash<m::pe::hs::search_states::SubproblemsArray>
1147{
1149 /* Rolling hash with multiplier taken from [1] where the moduli is 2^64.
1150 * [1] http://www.ams.org/mcom/1999-68-225/S0025-5718-99-00996-5/S0025-5718-99-00996-5.pdf */
1151 uint64_t hash = 0;
1152 for (m::pe::hs::Subproblem s : state) {
1153 hash = hash ^ uint64_t(s);
1154 hash = hash * 1181783497276652981UL + 4292484099903637661UL;
1155 }
1156 return hash;
1157 }
1158};
1159
1160template<>
1161struct hash<m::pe::hs::search_states::SubproblemTableBottomUp>
1162{
1164 /* Rolling hash with multiplier taken from [1] where the moduli is 2^64.
1165 * [1] http://www.ams.org/mcom/1999-68-225/S0025-5718-99-00996-5/S0025-5718-99-00996-5.pdf */
1166 uint64_t hash = 0;
1167 for (m::pe::hs::Subproblem s : state) {
1168 hash = hash ^ uint64_t(s);
1169 hash = hash * 1181783497276652981UL + 4292484099903637661UL;
1170 }
1171 return hash;
1172 }
1173};
1174
1175template<>
1176struct hash<m::pe::hs::search_states::EdgesBottomUp>
1177{
1179 /* Rolling hash with multiplier taken from [1] where the moduli is 2^64.
1180 * [1] http://www.ams.org/mcom/1999-68-225/S0025-5718-99-00996-5/S0025-5718-99-00996-5.pdf */
1181 uint64_t hash = 0;
1182 for (unsigned join_idx : state) {
1183 hash = hash ^ join_idx;
1184 hash = hash * 1181783497276652981UL + 4292484099903637661UL;
1185 }
1186 return hash;
1187 }
1188};
1189
1190template<>
1191struct hash<m::pe::hs::search_states::EdgePtrBottomUp>
1192{
1194 /*----- Collect all joins in sorted order. -----*/
1195 unsigned joins[state.num_joins()];
1196 unsigned *ptr = joins;
1197 for (auto it = state.cbegin(); it->parent(); ++it, ++ptr)
1198 *ptr = it->join_id();
1199 std::sort(joins, joins + state.num_joins());
1200
1201 /* Rolling hash with multiplier taken from [1] where the moduli is 2^64.
1202 * [1] http://www.ams.org/mcom/1999-68-225/S0025-5718-99-00996-5/S0025-5718-99-00996-5.pdf */
1203 uint64_t hash = 0;
1204 for (auto it = joins; it != joins + state.num_joins(); ++it) {
1205 hash = hash ^ *it;
1206 hash = hash * 1181783497276652981UL + 4292484099903637661UL;
1207 }
1208 return hash;
1209 }
1210};
1211
1212}
1213
1214
1215namespace m::pe::hs {
1216
1217/*======================================================================================================================
1218 * Heuristic Search State Expansions
1219 *====================================================================================================================*/
1220
1222namespace expansions {
1223
1224using namespace m::pe::hs;
1225using namespace m::pe::hs::search_states;
1226
1227
1229{
1230 template<typename State, typename PlanTable>
1231 static State Start(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
1232 const CardinalityEstimator &CE) {
1233 return State::Bottom(PT, G, M, CF, CE);
1234 }
1235
1236 template<typename State, typename PlanTable>
1237 static bool is_goal(const State &state, const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
1238 const CostFunction &CF, const CardinalityEstimator &CE)
1239 {
1240 return state.is_top(PT, G, M, CF, CE);
1241 }
1242
1243 template<typename State, typename PlanTable>
1244 void reset_marked(State &state, const PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&,
1245 const CardinalityEstimator&)
1246 {
1247 state.mark(Subproblem(1UL));
1248 }
1249};
1250
1252{
1254 using direction::is_goal;
1255
1256 template<typename Callback, typename PlanTable>
1257 void operator()(const SubproblemsArray &state, Callback &&callback, PlanTable &PT,
1258 const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
1259 const CardinalityEstimator &CE) const
1260 {
1262 M_insist(not is_goal(state, PT, G, M, CF, CE), "cannot expand goal");
1263#ifndef NDEBUG
1264 bool has_successor = false;
1265#endif
1266
1267 const Subproblem marked = state.marked();
1268 const Subproblem All = Subproblem::All(G.num_sources());
1269
1270 /* Enumerate all potential join pairs and check whether they are connected. */
1271 for (auto outer_it = state.cbegin(), outer_end = std::prev(state.cend()); outer_it != outer_end; ++outer_it)
1272 {
1273 const auto neighbors = M.neighbors(*outer_it);
1274 for (auto inner_it = std::next(outer_it); inner_it != state.cend(); ++inner_it) {
1275 M_insist(subproblem_lt(*outer_it, *inner_it), "subproblems must be sorted");
1276 M_insist((*outer_it & *inner_it).empty(), "subproblems must not overlap");
1277
1278 if (subproblem_lt(*inner_it, marked)) // *inner_it < marked, implies *outer_it < marked
1279 continue; // prune symmetrical paths
1280
1281 if (neighbors & *inner_it) { // inner and outer are joinable.
1282 /* Compute joined subproblem. */
1283 const Subproblem joined = *outer_it | *inner_it;
1284
1285 /* Compute new subproblems after join */
1286 Subproblem *subproblems = state.get_allocator().allocate(state.size() - 1);
1287 Subproblem *ptr = subproblems;
1288 for (auto it = state.cbegin(); it != state.cend(); ++it) {
1289 if (it == outer_it) continue; // skip outer
1290 else if (it == inner_it) new (ptr++) Subproblem(joined); // replace inner
1291 else new (ptr++) Subproblem(*it);
1292 }
1293 M_insist(std::is_sorted(subproblems, subproblems + state.size() - 1, subproblem_lt));
1294
1295 /* Compute total cost. */
1296 cnf::CNF condition; // TODO use join condition
1297 if (not PT[joined].model) {
1298 auto &model_left = *PT[*outer_it].model;
1299 auto &model_right = *PT[*inner_it].model;
1300 PT[joined].model = CE.estimate_join(G, model_left, model_right, condition);
1301 }
1302 /* The cost of the final join is always the size of the result set, and hence the same for all
1303 * plans. We therefore omit this cost, as otherwise goal states might be artificially postponed in
1304 * the priority queue. */
1305 const double action_cost = joined == All ? 0 : CE.predict_cardinality(*PT[joined].model);
1306
1307 /* Create new search state. */
1309 /* Context= */ PT, G, M, CF, CE,
1310 /* parent= */ &state,
1311 /* g= */ state.g() + action_cost,
1312 /* size= */ state.size() - 1,
1313 /* marked= */ joined,
1314 /* subproblems= */ subproblems
1315 );
1317 callback(std::move(S));
1318#ifndef NDEBUG
1319 has_successor = true;
1320#endif
1321 }
1322 }
1323 }
1324#ifndef NDEBUG
1325 M_insist(has_successor, "must expand to at least one successor");
1326#endif
1327 }
1328
1329 template<typename Callback, typename PlanTable>
1330 void operator()(const SubproblemTableBottomUp &state, Callback &&callback, PlanTable &PT,
1331 const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
1332 const CardinalityEstimator &CE) const
1333 {
1335
1336 /*----- Enumerate all left sides. -----*/
1337 const Subproblem All = Subproblem::All(G.num_sources());
1338 Subproblem X;
1339 std::size_t i = 0;
1340 while (X != All) {
1341 const Subproblem L = state[i++];
1342 if (L.is_subset(X)) continue;
1343 X |= L; // remember left side
1344
1345 Subproblem N = M.neighbors(L) - X;
1346 /*----- Enumerate all right sides of left side. -----*/
1347 while (not N.empty()) {
1348 auto it = N.begin();
1349 const Subproblem R = state[*it];
1350 N -= R;
1351
1352 // M_insist(uint64_t(L) < uint64_t(R));
1353 M_insist(M.is_connected(L, R));
1354 M_insist((L & R).empty());
1355
1356 /* Compute joined subproblem. */
1357 const Subproblem joined = L|R;
1358
1359 if (subproblem_lt(joined, state.marker())) continue; // prune symmetrical paths
1360
1361 /* Compute new subproblem table after join. */
1362 auto table = state.get_allocator().allocate(G.num_sources());
1363
1364#if 1
1365 for (std::size_t j = 0; j != G.num_sources(); ++j)
1366 table[j] = joined[j] ? joined : state[j];
1367#else
1368 std::copy_n(&state[0], G.num_sources(), table);
1369 for (std::size_t i : joined)
1370 table[i] = joined;
1371#endif
1372
1373 /* Compute total cost. */
1374 cnf::CNF condition; // TODO use join condition
1375 PT.update(G, CE, CF, L, R, condition);
1376
1377 /* Compute action cost. */
1378 const double total_cost = CF.calculate_join_cost(G, PT, CE, L, R, condition);
1379 const double action_cost = total_cost - (PT[L].cost + PT[R].cost);
1380
1381 /* Create new search state. */
1383 /* Context= */ PT, G, M, CF, CE,
1384 /* g= */ state.g() + action_cost,
1385 /* size= */ state.size() - 1,
1386 /* mark= */ joined,
1387 /* table_ */ table
1388 );
1390 callback(std::move(S));
1391 }
1392 }
1393 }
1394
1395 template<typename Callback, typename PlanTable>
1396 void operator()(const EdgesBottomUp &state, Callback &&callback, PlanTable &PT,
1397 const QueryGraph &G, const AdjacencyMatrix&, const CostFunction &CF,
1398 const CardinalityEstimator &CE) const
1399 {
1401 M_insist(std::is_sorted(state.cbegin(), state.cend()), "joins must be sorted by index");
1402
1403 /*----- Compute datasource to subproblem reverse index. -----*/
1404 Subproblem subproblems[G.num_sources()];
1405 uint8_t datasource_to_subproblem[G.num_sources()];
1406 state.compute_datasource_to_subproblem_index(G, subproblems, datasource_to_subproblem);
1407
1408 /*----- Initialize join matrix. -----*/
1409 const unsigned size_of_join_matrix = G.num_sources() * (G.num_sources() - 1) / 2;
1411 bool join_matrix[size_of_join_matrix];
1412 std::fill_n(join_matrix, size_of_join_matrix, false);
1413 auto joined = [&G, size_of_join_matrix](bool *matrix, unsigned row, unsigned col) -> bool& {
1414 const unsigned x = std::min(row, col);
1415 const unsigned y = std::max(row, col);
1416 M_insist(y < G.num_sources());
1417 M_insist(x < y);
1418 M_insist(y * (y - 1) / 2 + x < size_of_join_matrix);
1419 return matrix[ y * (y - 1) / 2 + x ];
1420 };
1421
1422 /*----- Enumerate all joins that can still be performed. -----*/
1423 unsigned *joins = static_cast<unsigned*>(alloca(sizeof(unsigned[state.num_joins() + 1])));
1424 std::copy(state.cbegin(), state.cend(), joins + 1);
1425 std::size_t j = 0;
1426 joins[j] = 0;
1427 M_insist(std::is_sorted(joins, joins + state.num_joins() + 1));
1428
1429 /*---- Find the first gap in the joins. -----*/
1430 while (j < state.num_joins() and joins[j] == joins[j+1]) {
1431 ++j;
1432 ++joins[j];
1433 }
1434 M_insist(j == state.num_joins() or joins[j] < joins[j+1]);
1435 M_insist(std::is_sorted(joins, joins + state.num_joins() + 1));
1436
1437 for (;;) {
1438 M_insist(j <= state.num_joins(), "j out of bounds");
1439 M_insist(j == state.num_joins() or joins[j] < joins[j+1], "invalid position of j");
1440
1441 const Join &join = *G.joins()[joins[j]];
1442 const auto &sources = join.sources();
1443
1444 /*----- Check whether the join is subsumed by joins in `state`. -----*/
1445 const unsigned left = datasource_to_subproblem[sources[0].get().id()];
1446 const unsigned right = datasource_to_subproblem[sources[1].get().id()];
1447 if (left == right) goto next; // the data sources joined already belong to the same subproblem
1448
1449 /*----- Check whether the join is subsumed by a previously considered join. -----*/
1450 if (bool &was_joined_before = joined(join_matrix, left, right); was_joined_before)
1451 goto next;
1452 else
1453 was_joined_before = true; // this will be joined now
1454
1455 /*----- This is a feasible join. Compute the successor state. -----*/
1456 {
1457 M_insist(std::is_sorted(joins, joins + state.num_joins() + 1));
1458 M_insist(not subproblems[left].empty());
1459 M_insist(not subproblems[right].empty());
1460
1461 /*----- Compute total cost. -----*/
1462 cnf::CNF condition; // TODO use join condition
1463 PT.update(G, CE, CF, subproblems[left], subproblems[right], condition);
1464
1465 /* Compute action cost. */
1466 const double total_cost = CF.calculate_join_cost(G, PT, CE, subproblems[left], subproblems[right], condition);
1467 const double action_cost = total_cost - (PT[subproblems[left]].cost + PT[subproblems[right]].cost);
1468
1469 EdgesBottomUp S(state.num_joins_to_goal(), state.g() + action_cost, joins,
1470 joins + state.num_joins() + 1);
1472 callback(std::move(S));
1473 }
1474
1475next:
1476 /*----- Advance to next join. -----*/
1477 ++joins[j];
1478 while (j < state.num_joins() and joins[j] == joins[j+1]) {
1479 ++j;
1480 ++joins[j];
1481 }
1482 if (joins[j] == G.num_joins())
1483 break;
1484 };
1485 }
1486
1487 template<typename Callback, typename PlanTable>
1488 void operator()(const EdgePtrBottomUp &state, Callback &&callback, PlanTable &PT,
1489 const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
1490 const CardinalityEstimator &CE) const
1491 {
1493
1494 /*----- Compute datasource to subproblem reverse index. -----*/
1495 Subproblem subproblems[G.num_sources()];
1496 uint8_t datasource_to_subproblem[G.num_sources()];
1497 state.compute_datasource_to_subproblem_index(G, subproblems, datasource_to_subproblem);
1498
1499 /*----- Initialize join matrix. -----*/
1500 const unsigned size_of_join_matrix = G.num_sources() * (G.num_sources() - 1) / 2;
1502 bool join_matrix[size_of_join_matrix];
1503 std::fill_n(join_matrix, size_of_join_matrix, false);
1504 auto joined = [&G, size_of_join_matrix](bool *matrix, unsigned row, unsigned col) -> bool& {
1505 const unsigned x = std::min(row, col);
1506 const unsigned y = std::max(row, col);
1507 M_insist(y < G.num_sources());
1508 M_insist(x < y);
1509 M_insist(y * (y - 1) / 2 + x < size_of_join_matrix);
1510 return matrix[ y * (y - 1) / 2 + x ];
1511 };
1512
1513 /*----- Collect all joins of `state`. -----*/
1514 unsigned joins_taken[state.num_joins()];
1515 {
1516 unsigned *ptr = joins_taken;
1517 for (auto it = state.cbegin(); it->parent(); ++it, ++ptr)
1518 *ptr = it->join_id();
1519 std::sort(joins_taken, joins_taken + state.num_joins());
1520 }
1521
1522 /*----- Enumerate all joins that can still be performed. -----*/
1523 unsigned *sweepline = joins_taken;
1524 for (std::size_t j = 0; j != G.num_joins(); ++j) {
1525 if (sweepline != joins_taken + state.num_joins() and j == *sweepline) {
1526 /*----- Join already present. -----*/
1527 ++sweepline;
1528 continue;
1529 }
1530
1531 const Join &join = *G.joins()[j];
1532 const auto &sources = join.sources();
1533
1534 /*----- Check whether the join is subsumed by joins in `state`. -----*/
1535 const unsigned left = datasource_to_subproblem[sources[0].get().id()];
1536 const unsigned right = datasource_to_subproblem[sources[1].get().id()];
1537 if (left == right) continue; // the data sources joined already belong to the same subproblem
1538
1539 /*----- Check whether the join is subsumed by a previously considered join. -----*/
1540 if (bool &was_joined_before = joined(join_matrix, left, right); was_joined_before)
1541 continue;
1542 else
1543 was_joined_before = true; // this will be joined now
1544
1545 /*----- This is a feasible join. Compute the successor state. -----*/
1546 M_insist(not subproblems[left].empty());
1547 M_insist(not subproblems[right].empty());
1548 M_insist(M.is_connected(subproblems[left], subproblems[right]));
1549
1550 /*----- Compute total cost. -----*/
1551 cnf::CNF condition; // TODO use join condition
1552 PT.update(G, CE, CF, subproblems[left], subproblems[right], condition);
1553
1554 /* Compute action cost. */
1555 const double total_cost = CF.calculate_join_cost(G, PT, CE, subproblems[left], subproblems[right], condition);
1556 const double action_cost = total_cost - (PT[subproblems[left]].cost + PT[subproblems[right]].cost);
1557
1558 EdgePtrBottomUp S(/* parent= */ &state,
1559 /* num_joins= */ state.num_joins() + 1,
1560 /* g= */ state.g() + action_cost,
1561 /* join_id= */ j);
1563 callback(std::move(S));
1564 }
1565 }
1566};
1567
1568
1570{
1571 template<typename State, typename PlanTable>
1572 static State Start(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
1573 const CardinalityEstimator &CE) {
1574 return State::Top(PT, G, M, CF, CE);
1575 }
1576
1577 template<typename State, typename PlanTable>
1578 static bool is_goal(const State &state, const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
1579 const CostFunction &CF, const CardinalityEstimator &CE)
1580 {
1581 return state.is_bottom(PT, G, M, CF, CE);
1582 }
1583
1584 template<typename State, typename PlanTable>
1585 void reset_marked(State &state, const PlanTable&, const QueryGraph &G, const AdjacencyMatrix&, const CostFunction&,
1586 const CardinalityEstimator&)
1587 {
1588 const Subproblem All = Subproblem::All(G.num_sources());
1589 state.mark(All);
1590 }
1591};
1592
1594{
1596 using direction::is_goal;
1597
1598 template<typename Callback, typename PlanTable>
1599 void operator()(const SubproblemsArray &state, Callback &&callback, PlanTable &PT,
1600 const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
1601 const CardinalityEstimator &CE) const
1602 {
1604 M_insist(not is_goal(state, PT, G, M, CF, CE), "cannot expand goal");
1605#ifndef NDEBUG
1606 bool has_successor = false;
1607#endif
1608
1609 const Subproblem All = Subproblem::All(G.num_sources());
1610 auto enumerate_ccp = [&](Subproblem S1, Subproblem S2) {
1611 using std::swap;
1612
1613 if (subproblem_lt(S2, S1)) swap(S1, S2);
1614 M_insist(subproblem_lt(S1, S2));
1615
1616 /*----- Merge subproblems of state, excluding S1|S2, and partitions S1 and S2. -----*/
1617 Subproblem *subproblems = state.get_allocator().allocate(state.size() + 1);
1618 {
1619 Subproblem partitions[2] = { S1, S2 };
1620 auto left_it = state.cbegin(), left_end = state.cend();
1621 auto right_it = partitions, right_end = partitions + 2;
1622 auto out = subproblems;
1623 for (;;) {
1624 M_insist(out <= subproblems + state.size() + 1, "out of bounds");
1625 M_insist(left_it <= left_end, "out of bounds");
1626 M_insist(right_it <= right_end, "out of bounds");
1627 if (left_it == left_end) {
1628 if (right_it == right_end) break;
1629 *out++ = *right_it++;
1630 } else if (*left_it == (S1|S2)) [[unlikely]] {
1631 ++left_it; // skip partitioned subproblem S1|S2
1632 } else if (right_it == right_end) {
1633 *out++ = *left_it++;
1634 } else if (subproblem_lt(*left_it, *right_it)) {
1635 *out++ = *left_it++;
1636 } else {
1637 *out++ = *right_it++;
1638 }
1639 }
1640 M_insist(out == subproblems + state.size() + 1);
1641 M_insist(left_it == left_end);
1642 M_insist(right_it == right_end);
1643 }
1644 M_insist(std::is_sorted(subproblems, subproblems + state.size() + 1, subproblem_lt));
1645
1646 cnf::CNF condition; // TODO use join condition
1647 /* The cost of the final join is always the size of the result set, and hence the same for all plans. We
1648 * therefore omit this cost, as otherwise goal states might be artificially postponed in the priority queue.
1649 * */
1650 double action_cost = 0;
1651 if ((S1|S2) != All) {
1652 if (not PT[S1|S2].model)
1653 PT[S1|S2].model = CE.estimate_join_all(G, PT, S1|S2, condition);
1654 action_cost = CE.predict_cardinality(*PT[S1|S2].model);
1655 }
1656
1657 /* Create new search state. */
1659 /* Context= */ PT, G, M, CF, CE,
1660 /* parent= */ &state,
1661 /* g= */ state.g() + action_cost,
1662 /* size= */ state.size() + 1,
1663 /* marked= */ S1|S2,
1664 /* subproblems= */ subproblems
1665 );
1667 callback(std::move(S));
1668#ifndef NDEBUG
1669 has_successor = true;
1670#endif
1671 };
1672
1673 for (const Subproblem S : state) {
1674 if (not S.is_singleton()) { // partition only the first non-singleton
1675 MinCutAGaT{}.partition(M, enumerate_ccp, S);
1676 break;
1677 }
1678 }
1679
1680#ifndef NDEBUG
1681 M_insist(has_successor, "must expand to at least one successor");
1682#endif
1683 }
1684
1685};
1686
1687}
1688
1689}
1690
1691
1692namespace m::pe::hs {
1693
1694/*======================================================================================================================
1695 * Heuristic Search Heuristics
1696 *====================================================================================================================*/
1697
1698namespace heuristics {
1699
1700using namespace m::pe::hs::search_states;
1701using namespace m::pe::hs::expansions;
1702
1703
1704template<typename PlanTable, typename State, typename Expand>
1705struct zero
1706{
1707 using state_type = State;
1708
1709 static constexpr bool is_admissible = true;
1710
1711 zero(PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&, const CardinalityEstimator&)
1712 { }
1713
1714 double operator()(const state_type&, PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&,
1715 const CardinalityEstimator&) const
1716 {
1717 return 0;
1718 }
1719};
1720
1721
1725template<typename PlanTable, typename State, typename Expand>
1726struct sum;
1727
1728template<typename PlanTable, typename State>
1729struct sum<PlanTable, State, BottomUp>
1730{
1731 using state_type = State;
1732
1735 static constexpr bool is_admissible = false;
1736
1737 sum(PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&, const CardinalityEstimator&) { }
1738
1739 double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
1740 const CostFunction &CF, const CardinalityEstimator &CE) const
1741 {
1742 double distance = 0;
1743 if (not state.is_top(PT, G, M, CF, CE)) {
1744 state.for_each_subproblem([&](const Subproblem S) {
1745 distance += CE.predict_cardinality(*PT[S].model);
1746 }, G);
1747 }
1748 return distance;
1749 }
1750};
1751
1752template<typename PlanTable, typename State>
1753struct sum<PlanTable, State, TopDown>
1754{
1755 using state_type = State;
1756
1757 static constexpr bool is_admissible = true;
1758
1759 sum(PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&, const CardinalityEstimator&) { }
1760
1761 double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
1762 const CostFunction &CF, const CardinalityEstimator &CE) const
1763 {
1764 static cnf::CNF condition; // TODO use join condition
1765 if (state.is_top(PT, G, M, CF, CE))
1766 return 0;
1767 double distance = 0;
1768 state.for_each_subproblem([&](const Subproblem S) {
1769 if (not S.is_singleton()) { // skip base relations
1770 if (not PT[S].model)
1771 PT[S].model = CE.estimate_join_all(G, PT, S, condition);
1772 distance += CE.predict_cardinality(*PT[S].model);
1773 }
1774 }, G);
1775 return distance;
1776 }
1777};
1778
1779template<typename PlanTable, typename State, typename Expand>
1780struct sum : sum<PlanTable, State, typename Expand::direction>
1781{
1782 using sum<PlanTable, State, typename Expand::direction>::sum; // forward base c'tor
1783};
1784
1785
1786template<typename PlanTable, typename State, typename Expand>
1787struct sqrt_sum;
1788
1789template<typename PlanTable, typename State>
1790struct sqrt_sum<PlanTable, State, TopDown>
1791{
1792 using state_type = State;
1793
1794 sqrt_sum(PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&, const CardinalityEstimator&)
1795 { }
1796
1797 double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix&,
1798 const CostFunction&, const CardinalityEstimator &CE) const
1799 {
1800 static cnf::CNF condition; // TODO use join condition
1801 double distance = 0;
1802 state.for_each_subproblem([&](const Subproblem S) {
1803 if (not S.is_singleton()) { // skip base relations
1804 if (not PT[S].model)
1805 PT[S].model = CE.estimate_join_all(G, PT, S, condition);
1806 distance += 2 * std::sqrt(CE.predict_cardinality(*PT[S].model));
1807 }
1808 }, G);
1809 return distance;
1810 }
1811};
1812
1813template<typename PlanTable, typename State, typename Expand>
1814struct sqrt_sum : sqrt_sum<PlanTable, State, typename Expand::direction>
1815{
1816 using sqrt_sum<PlanTable, State, typename Expand::direction>::sqrt_sum; // forward base c'tor
1817};
1818
1819
1820template<typename PlanTable, typename State, typename Expand>
1821struct scaled_sum;
1822
1823template<typename PlanTable, typename State>
1824struct scaled_sum<PlanTable, State, BottomUp>
1825{
1826 using state_type = State;
1827
1828 scaled_sum(PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&, const CardinalityEstimator&)
1829 { }
1830
1831 double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
1832 const CostFunction &CF, const CardinalityEstimator &CE) const
1833 {
1834 double distance = 0;
1835 double cardinalities[G.num_sources()];
1836 std::size_t num_sources = 0;
1837
1838 if (not state.is_top(PT, G, M, CF, CE)) {
1839 state.for_each_subproblem([&](Subproblem S) {
1840 cardinalities[num_sources++] = CE.predict_cardinality(*PT[S].model);
1841 }, G);
1842 std::sort(cardinalities, cardinalities + num_sources, std::greater<double>());
1843 for (std::size_t i = 0; i != num_sources - 1; ++i)
1844 distance += (i+1) * cardinalities[i];
1845 distance += (num_sources-1) * cardinalities[num_sources - 1];
1846 }
1847
1848 return distance;
1849 }
1850};
1851
1852template<typename PlanTable, typename State, typename Expand>
1853struct scaled_sum : scaled_sum<PlanTable, State, typename Expand::direction>
1854{
1855 using scaled_sum<PlanTable, State, typename Expand::direction>::scaled_sum; // forward base c'tor
1856};
1857
1858
1865template<typename PlanTable, typename State, typename Expand>
1866struct product;
1867
1868template<typename PlanTable, typename State>
1869struct product<PlanTable, State, BottomUp>
1870{
1871 using state_type = State;
1872
1873 product(PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&, const CardinalityEstimator&) { }
1874
1875 double operator()(const state_type &state, PlanTable &PT, const QueryGraph&, const AdjacencyMatrix&,
1876 const CostFunction&, const CardinalityEstimator &CE) const
1877 {
1878 if (state.size() > 1) {
1879 double distance = 1;
1880 for (auto s : state)
1881 distance *= CE.predict_cardinality(*PT[s].model);
1882 return distance;
1883 }
1884 return 0;
1885 }
1886};
1887
1888template<typename PlanTable, typename State, typename Expand>
1889struct product : product<PlanTable, State, typename Expand::direction>
1890{
1891 using product<PlanTable, State, typename Expand::direction>::product;
1892};
1893
1894
1895template<typename PlanTable, typename State>
1897{
1898 using state_type = State;
1899
1901 const CardinalityEstimator&)
1902 { }
1903
1904 double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
1905 const CostFunction &CF, const CardinalityEstimator &CE) const
1906 {
1907 if (state.is_top(PT, G, M, CF, CE)) return 0;
1908
1909 double distance = 0;
1910 for (auto s : state)
1911 distance += CE.predict_cardinality(*PT[s].model);
1912
1913 /* If there are only 2 subproblems left, the result of joining the two is the goal; no lookahead needed. */
1914 if (state.size() == 2)
1915 return distance;
1916
1917 /* Look for least additional costs. */
1918 double min_additional_costs = std::numeric_limits<decltype(min_additional_costs)>::infinity();
1919 Subproblem min_subproblem_left;
1920 Subproblem min_subproblem_right;
1921 for (auto outer_it = state.cbegin(); outer_it != state.cend(); ++outer_it) {
1922 const auto neighbors = M.neighbors(*outer_it);
1923 for (auto inner_it = std::next(outer_it); inner_it != state.cend(); ++inner_it) {
1924 M_insist(uint64_t(*inner_it) > uint64_t(*outer_it), "subproblems must be sorted");
1925 M_insist((*outer_it & *inner_it).empty(), "subproblems must not overlap");
1926 if (neighbors & *inner_it) { // inner and outer are joinable.
1927 const Subproblem joined = *outer_it | *inner_it;
1928 if (not PT[joined].model) {
1929 PT[joined].model = CE.estimate_join(G, *PT[*outer_it].model, *PT[*inner_it].model,
1930 /* TODO */ cnf::CNF{});
1931 }
1932 cnf::CNF condition; // TODO use join condition
1933 const double total_cost = CF.calculate_join_cost(G, PT, CE, *outer_it, *inner_it, condition);
1934 const double action_cost = total_cost - (PT[*outer_it].cost + PT[*inner_it].cost);
1936 const double additional_costs = action_cost + CE.predict_cardinality(*PT[joined].model);
1937 if (additional_costs < min_additional_costs) {
1938 min_subproblem_left = *outer_it;
1939 min_subproblem_right = *inner_it;
1940 min_additional_costs = additional_costs;
1941 }
1942 }
1943 }
1944 }
1945 distance += min_additional_costs;
1946 distance -= CE.predict_cardinality(*PT[min_subproblem_left].model);
1947 distance -= CE.predict_cardinality(*PT[min_subproblem_right].model);
1948 return distance;
1949 }
1950};
1951
1952
1954template<typename PlanTable, typename State, typename Expand>
1955struct GOO;
1956
1957template<typename PlanTable, typename State>
1958struct GOO<PlanTable, State, BottomUp>
1959{
1960 using state_type = State;
1961
1962 GOO(PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&, const CardinalityEstimator&) { }
1963
1964 double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
1965 const CostFunction &CF, const CardinalityEstimator &CE) const
1966 {
1967 using std::swap;
1968
1969 /*----- Initialize nodes. -----*/
1970 m::pe::GOO::node nodes[G.num_sources()];
1971 std::size_t num_nodes = 0;
1972 state.for_each_subproblem([&](Subproblem S) {
1973 new (&nodes[num_nodes++]) m::pe::GOO::node(S, M.neighbors(S));
1974 }, G);
1975
1976 /*----- Greedily enumerate all joins. -----*/
1977 const Subproblem All = Subproblem::All(G.num_sources());
1978 double cost = 0;
1979 m::pe::GOO{}.for_each_join([&](Subproblem left, Subproblem right) {
1980 static cnf::CNF condition; // TODO: use join condition
1981 if (All != (left|right)) {
1982 const double old_cost_left = std::exchange(PT[left].cost, 0);
1983 const double old_cost_right = std::exchange(PT[right].cost, 0);
1984 cost += CF.calculate_join_cost(G, PT, CE, left, right, condition);
1985 PT[left].cost = old_cost_left;
1986 PT[right].cost = old_cost_right;
1987 }
1988 }, PT, G, M, CF, CE, nodes, nodes + num_nodes);
1989
1990 return cost;
1991 }
1992};
1993
1995template<typename PlanTable, typename State>
1996struct GOO<PlanTable, State, TopDown>
1997{
1998 using state_type = State;
1999
2000 GOO(PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&, const CardinalityEstimator&) { }
2001
2002 double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
2003 const CostFunction &CF, const CardinalityEstimator &CE) const
2004 {
2005 /*----- Initialize worklist from subproblems. -----*/
2006 std::vector<Subproblem> worklist;
2007 worklist.reserve(G.num_sources());
2008 worklist.insert(worklist.end(), state.cbegin(), state.cend());
2009
2010 /*----- Greedily enumerate all joins. -----*/
2011 double cost = 0;
2012 m::pe::TDGOO{}.for_each_join([&](Subproblem left, Subproblem right) {
2013 cost += CE.predict_cardinality(*PT[left].model) + CE.predict_cardinality(*PT[right].model);
2014 }, PT, G, M, CF, CE, std::move(worklist));
2015
2016 return cost;
2017 }
2018};
2019
2020template<typename PlanTable, typename State, typename Expand>
2021struct GOO : GOO<PlanTable, State, typename Expand::direction>
2022{
2023 using GOO<PlanTable, State, typename Expand::direction>::GOO;
2024};
2025
2026
2027template<typename PlanTable, typename State, typename Expand>
2028struct avg_sel;
2029
2030template<typename PlanTable, typename State>
2031struct avg_sel<PlanTable, State, BottomUp>
2032{
2033 using state_type = State;
2034
2035 avg_sel(PlanTable&, const QueryGraph&, const AdjacencyMatrix&, const CostFunction&, const CardinalityEstimator&) { }
2036
2037 double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix&,
2038 const CostFunction&, const CardinalityEstimator &CE) const
2039 {
2040 using std::swap;
2041 if (state.size() <= 2) return 0;
2042
2043 double cardinalities[state.size()];
2044 double *end;
2045 {
2046 double *ptr = cardinalities;
2047 for (const Subproblem S : state)
2048 *ptr++ = CE.predict_cardinality(*PT[S].model);
2049 end = ptr;
2050 std::sort(cardinalities, end); // sort cardinalities in ascending order
2051 }
2052
2053 const Subproblem All = Subproblem::All(G.num_sources());
2054 if (not PT[All].model) {
2055 static cnf::CNF condition;
2056 PT[All].model = CE.estimate_join_all(G, PT, All, condition);
2057 }
2058
2059 double Cprod = std::reduce(cardinalities, end, 1., std::multiplies<double>{});
2060 const double sel_remaining = CE.predict_cardinality(*PT[All].model) / Cprod;
2061 M_insist(sel_remaining <= 1.1);
2062
2063 const std::size_t num_joins_remaining = state.size() - 1;
2064 const double avg_sel = std::pow(sel_remaining, 1. / num_joins_remaining);
2065
2066 /*----- Keep joining the two smallest subproblems and accumulate the cost. -----*/
2067 double accumulated_cost = 0;
2068 for (auto ptr = cardinalities; ptr < end - 1; ++ptr) {
2069 const double card = ptr[0] * ptr[1] * avg_sel;
2070 accumulated_cost += card;
2071 *++ptr = card;
2072 for (auto runner = ptr; runner != end - 1 and runner[0] > runner[1]; ++runner)
2073 swap(runner[0], runner[1]);
2074 }
2075
2076 return accumulated_cost;
2077 }
2078};
2079
2080template<typename PlanTable, typename State, typename Expand>
2081struct avg_sel : avg_sel<PlanTable, State, typename Expand::direction>
2082{
2083 using avg_sel<PlanTable, State, typename Expand::direction>::avg_sel;
2084};
2085
2086}
2087
2088}
2089
2090
2091namespace m::pe::hs {
2092
2093namespace config {
2094
2096{
2097 template<typename Cmp>
2098 using compare = boost::heap::compare<Cmp>;
2099
2100 template<typename T, typename... Options>
2101 using heap_type = boost::heap::fibonacci_heap<T, Options...>;
2102
2103 template<typename T>
2104 using allocator_type = boost::container::node_allocator<T>;
2105};
2106
2107template<bool B>
2109{
2110 static constexpr bool PerformWeightedSearch= B;
2111};
2112
2113template<long Num, long Denom = 1>
2114struct beam
2115{
2116 using BeamWidth = std::ratio<Num, Denom>;
2117};
2118
2119template<bool B>
2120struct lazy
2121{
2122 static constexpr bool Lazy = B;
2123};
2124
2125template<bool B>
2127{
2128 static constexpr bool IsMonotone = B;
2129};
2130
2131template<bool B>
2133{
2134 static constexpr bool PerformCostBasedPruning = B;
2135};
2136
2137template<bool B>
2139{
2140 static constexpr bool PerformAnytimeSearch = B;
2141};
2142
2144template<typename T, typename... Ts>
2145struct combine : T, combine<Ts...> { };
2146
2149template<typename T>
2150struct combine<T> : T { };
2151
2152
2153/*===== Pre-configured Search Strategies =============================================================================*/
2154
2155#define DEFINE_SEARCH(NAME, ...) \
2156 using NAME = combine<monotone<true>, Fibonacci_heap, __VA_ARGS__>
2157
2170
2171#undef DEFINE_SEARCH
2172
2173}
2174
2175}
2176
2177
2178namespace m::pe::hs {
2179
2180using binary_plan_type = std::vector<std::pair<Subproblem, Subproblem>>;
2181
2182template<typename PlanTable, typename State>
2183double goo_path_completion(const State &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
2184 const CardinalityEstimator &CE, const CostFunction &CF, binary_plan_type &plan);
2185
2186template<
2187 typename PlanTable,
2188 typename State,
2189 typename Expand,
2190 typename Search,
2191 template<typename, typename, typename> typename Heuristic,
2192 ai::SearchConfigConcept StaticConfig
2193>
2194bool heuristic_search(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF,
2195 const CardinalityEstimator &CE, Search &S, const ai::SearchConfiguration<StaticConfig> &config);
2196
2198struct HeuristicSearch final : PlanEnumeratorCRTP<HeuristicSearch>
2199{
2201 using base_type::operator();
2202
2203 template<typename PlanTable>
2204 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const;
2205};
2206
2207}
#define DEFINE_SEARCH(NAME,...)
#define id(X)
Check whether.
Definition: concepts.hpp:39
#define M_unreachable(MSG)
Definition: macro.hpp:146
#define M_notnull(ARG)
Definition: macro.hpp:182
#define M_insist(...)
Definition: macro.hpp:129
‍heuristic search state expansions namespace
‍heuristic search states namespace
‍heuristic search namespace
bool heuristic_search(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE, Search &S, const ai::SearchConfiguration< StaticConfig > &config)
double goo_path_completion(const State &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CardinalityEstimator &CE, const CostFunction &CF, binary_plan_type &plan)
std::vector< std::pair< Subproblem, Subproblem > > binary_plan_type
‍mutable namespace
Definition: Backend.hpp:10
void swap(PlanTableBase< Actual > &first, PlanTableBase< Actual > &second)
Definition: PlanTable.hpp:394
T(x)
and
Definition: enum_ops.hpp:12
STL namespace.
An adjacency matrix for a given query graph.
bool is_connected(SmallBitset S) const
Returns true iff the subproblem S is connected.
SmallBitset neighbors(SmallBitset S) const
Computes the neighbors of S.
std::unique_ptr< DataModel > estimate_join_all(const QueryGraph &G, const PlanTable &PT, Subproblem to_join, const cnf::CNF &condition) const
Compute a DataModel for the result of joining all DataSources in to_join by condition.
virtual std::unique_ptr< DataModel > estimate_join(const QueryGraph &G, const DataModel &left, const DataModel &right, const cnf::CNF &condition) const =0
Form a new DataModel by joining two DataModels.
virtual std::size_t predict_cardinality(const DataModel &data) const =0
double calculate_join_cost(const QueryGraph &G, const PlanTable &PT, const CardinalityEstimator &CE, Subproblem left, Subproblem right, const cnf::CNF &condition) const
Returns the total cost of performing a Join operation.
A Join in a QueryGraph combines DataSources by a join condition.
Definition: QueryGraph.hpp:138
const sources_t & sources() const
Returns a reference to the joined DataSources.
Definition: QueryGraph.hpp:153
void partition(const AdjacencyMatrix &M, Callback &&callback, const Subproblem S) const
Definition: MinCutAGaT.hpp:73
Singleton class representing options provided as command line argument to the binaries.
Definition: Options.hpp:12
The query graph represents all data sources and joins in a graph structure.
Definition: QueryGraph.hpp:172
std::size_t num_joins() const
Returns the number of Joins in this graph.
Definition: QueryGraph.hpp:227
const auto & joins() const
Definition: QueryGraph.hpp:258
std::size_t num_sources() const
Returns the number of DataSources in this graph.
Definition: QueryGraph.hpp:224
Implements a small and efficient set over integers in the range of 0 to 63 (including).
Definition: ADT.hpp:26
bool is_singleton() const
Definition: ADT.hpp:165
static SmallBitset All(std::size_t n)
Factory method for creating a SmallBitset with first n bits set.
Definition: ADT.hpp:117
bool empty() const
Returns true if there are no elements in this SmallBitset.
Definition: ADT.hpp:163
static SmallBitset Singleton(std::size_t n)
Factory method for creating a Singleton Smallbitset with n -th bit set.
Definition: ADT.hpp:125
auto begin() const
Definition: ADT.hpp:173
Relies on the rules of aggregate initialization
A CNF represents a conjunction of cnf::Clauses.
Definition: CNF.hpp:134
A helper class to define CRTP class hierarchies.
Definition: crtp.hpp:50
actual_type & actual()
Definition: crtp.hpp:52
Greedy operator ordering.
void for_each_join(Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &, const CardinalityEstimator &CE, node *begin, node *end) const
Enumerate the sequence of joins that yield the smallest subproblem in each step.
Top-down version of greedy operator ordering.
void for_each_join(Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &, const CardinalityEstimator &CE, std::vector< Subproblem > subproblems) const
Enumerate the sequence of graph cuts that yield the smallest subproblems in each step.
Computes the join order using heuristic search.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
boost::container::node_allocator< T > allocator_type
boost::heap::fibonacci_heap< T, Options... > heap_type
Combines multiple configuration parameters into a single configuration type.
void operator()(const EdgesBottomUp &state, Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &CF, const CardinalityEstimator &CE) const
void operator()(const SubproblemsArray &state, Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
static bool is_goal(const State &state, const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE)
void operator()(const EdgePtrBottomUp &state, Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
void operator()(const SubproblemTableBottomUp &state, Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
static bool is_goal(const State &state, const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE)
void reset_marked(State &state, const PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
static State Start(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE)
static bool is_goal(const State &state, const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE)
void operator()(const SubproblemsArray &state, Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
static bool is_goal(const State &state, const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE)
static State Start(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE)
void reset_marked(State &state, const PlanTable &, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
GOO(PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
GOO(PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
Inspired by GOO: Greedy Operator Ordering.
avg_sel(PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &CE) const
bottomup_lookahead_cheapest(PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
product(PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
double operator()(const state_type &state, PlanTable &PT, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &CE) const
This heuristic estimates the distance from a state to the nearest goal state as the product of the si...
scaled_sum(PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &CE) const
sqrt_sum(PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
sum(PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
sum(PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
double operator()(const state_type &state, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
This heuristic estimates the distance from a state to the nearest goal state as the sum of the sizes ...
zero(PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
double operator()(const state_type &, PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &) const
void for_each_subproblem(Callback &&callback, const QueryGraph &G) const
void for_each_successor(Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
Returns true iff this and other have the exact same Subproblems.
bool is_top(const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
double decrease_g(const Actual *new_parent, double new_g) const
Reduces the g value of the state.
static state_counters_t STATE_COUNTERS(state_counters_t new_counters)
double g() const
Returns the cost to reach this state from the initial state.
bool is_bottom(const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
M_LCOV_EXCL_START void dump(std::ostream &out) const
void make_owner(const EdgePtrBottomUp *state, const QueryGraph &G)
friend void swap(Scratchpad &first, Scratchpad &second)
std::conditional_t< is_const, const EdgePtrBottomUp &, EdgePtrBottomUp & > reference_type
std::conditional_t< is_const, const EdgePtrBottomUp *, EdgePtrBottomUp * > pointer_type
void compute_datasource_to_subproblem_index(const QueryGraph &G, Subproblem *subproblems, uint8_t *datasource_to_subproblem) const
Computes the Subproblems produced by the Joins of this state in subproblems.
bool operator!=(const EdgePtrBottomUp &other) const
friend void swap(EdgePtrBottomUp &first, EdgePtrBottomUp &second)
bool operator==(const EdgePtrBottomUp &other) const
Returns true iff this and other have the exact same joins.
void for_each_subproblem(Callback &&callback, const QueryGraph &G) const
bool is_goal(const PlanTable &, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &) const
unsigned num_joins_remaining(const QueryGraph &G) const
EdgePtrBottomUp(const QueryGraph &)
Creates an initial state.
static EdgePtrBottomUp CreateInitial(const QueryGraph &G, const AdjacencyMatrix &)
EdgePtrBottomUp(const EdgePtrBottomUp &)=delete
Copy c'tor.
EdgePtrBottomUp & operator=(EdgePtrBottomUp other)
Assignment.
M_LCOV_EXCL_START friend std::ostream & operator<<(std::ostream &out, const EdgePtrBottomUp &S)
EdgePtrBottomUp(const EdgePtrBottomUp *parent, unsigned num_joins, double g, unsigned join_id)
Creates a state with actual costs g and given subproblems.
EdgesBottomUp(size_type num_joins_to_goal, size_type num_joins, unsigned *joins)
Creates an initial state with the given subproblems.
boost::container::node_allocator< unsigned > allocator_type
EdgesBottomUp(size_type num_joins_to_goal, double g, size_type num_joins, unsigned *joins)
Creates a state with actual costs g and given subproblems.
bool operator==(const EdgesBottomUp &other) const
Returns true iff this and other have the exact same joins.
EdgesBottomUp(const EdgesBottomUp &other)
Copy c'tor.
size_type num_joins_
‍number of joins performed to reach this state
static allocator_type & get_allocator()
‍returns a reference to the class-wide allocator
double g_
‍the cost to reach this state from the initial state
bool is_goal(const PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &) const
M_LCOV_EXCL_START friend std::ostream & operator<<(std::ostream &out, const EdgesBottomUp &S)
void for_each_subproblem(Callback &&callback, const QueryGraph &G) const
size_type num_joins_to_goal_
‍number of joins necessary to reach goal
size_type num_subproblems(const QueryGraph &G) const
void compute_datasource_to_subproblem_index(const QueryGraph &G, Subproblem *subproblems, uint8_t *datasource_to_subproblem) const
Computes the Subproblems produced by the Joins of this state in subproblems.
unsigned * joins_
‍array of IDs of joins performed
static EdgesBottomUp CreateInitial(const QueryGraph &G, const AdjacencyMatrix &)
bool operator!=(const EdgesBottomUp &other) const
EdgesBottomUp & operator=(EdgesBottomUp other)
Assignment.
EdgesBottomUp(size_type num_joins_to_goal, double g, It begin, It end)
Creates a state with actual costs g and subproblems in range [begin; end).
friend void swap(EdgesBottomUp &first, EdgesBottomUp &second)
static allocator_type allocator_
‍class-wide allocator, used by all instances
the_iterator(Subproblem *table, size_type size, size_type idx, size_type unique)
std::conditional_t< is_const, const Subproblem &, Subproblem & > reference
double g_
‍the cost to reach this state from the initial state
bool operator!=(const SubproblemTableBottomUp &other) const
size_type size_
‍number of subproblems in this state
boost::container::node_allocator< Subproblem > allocator_type
SubproblemTableBottomUp(const SubproblemTableBottomUp &)=delete
Copy c'tor.
SubproblemTableBottomUp(const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE)
Creates an initial state.
static allocator_type & get_allocator()
‍returns a reference to the class-wide allocator
static allocator_type allocator_
‍class-wide allocator, used by all instances
bool is_goal(const PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &) const
friend void swap(SubproblemTableBottomUp &first, SubproblemTableBottomUp &second)
SubproblemTableBottomUp(const PlanTable &, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &, double g, size_type size, Subproblem marker, Subproblem *table)
Creates a state with cost g and given table.
SubproblemTableBottomUp(const SubproblemTableBottomUp &other, const PlanTable &, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
static unsigned num_partitions(const PlanTable &, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
bool operator==(const SubproblemTableBottomUp &other) const
Returns true iff this and other have the exact same Subproblems.
unsigned partition_id(const PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &) const
M_LCOV_EXCL_START friend std::ostream & operator<<(std::ostream &out, const SubproblemTableBottomUp &S)
bool operator<(const SubproblemTableBottomUp &) const
SubproblemTableBottomUp(SubproblemTableBottomUp &&other)
Move c'tor.
SubproblemTableBottomUp & operator=(SubproblemTableBottomUp other)
Assignment.
void for_each_subproblem(Callback &&callback, const QueryGraph &) const
M_LCOV_EXCL_START friend std::ostream & operator<<(std::ostream &out, const SubproblemsArray &S)
friend void swap(SubproblemsArray &first, SubproblemsArray &second)
double g_
‍the cost to reach this state from the initial state
static SubproblemsArray Bottom(const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE)
static allocator_type & get_allocator()
‍returns a reference to the class-wide allocator
SubproblemsArray & operator=(SubproblemsArray other)
Assignment.
SubproblemsArray(const PlanTable &, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &, const SubproblemsArray *parent, double g, size_type size, Subproblem marked, Subproblem *subproblems)
Creates a state with cost g and given subproblems.
void for_each_subproblem(Callback &&callback, const QueryGraph &) const
SubproblemsArray(const SubproblemsArray &)=delete
Copy c'tor.
static SubproblemsArray Top(const PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE)
static allocator_type allocator_
‍class-wide allocator, used by all instances
unsigned partition_id(const PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &) const
bool is_bottom(const PlanTable &, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &) const
static unsigned num_partitions(const PlanTable &, const QueryGraph &G, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
void decrease_g(const SubproblemsArray *new_parent, double new_g) const
size_type size_
‍number of subproblems in this state
bool is_top(const PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &) const
bool operator==(const SubproblemsArray &other) const
Returns true iff this and other have the exact same Subproblems.
Subproblem marked_
‍marked subproblem, used to avoid redundant paths
bool operator!=(const SubproblemsArray &other) const
SubproblemsArray(const SubproblemsArray &other, const PlanTable &, const QueryGraph &, const AdjacencyMatrix &, const CostFunction &, const CardinalityEstimator &)
boost::container::node_allocator< Subproblem > allocator_type
SubproblemsArray(SubproblemsArray &&other)
Move c'tor.
bool operator<(const SubproblemsArray &other) const
uint64_t operator()(const m::pe::hs::search_states::EdgePtrBottomUp &state) const
uint64_t operator()(const m::pe::hs::search_states::EdgesBottomUp &state) const
uint64_t operator()(const m::pe::hs::search_states::SubproblemTableBottomUp &state) const
uint64_t operator()(const m::pe::hs::search_states::SubproblemsArray &state) const