mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
PlanEnumerator.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <cstdint>
7#include <mutable/mutable-config.hpp>
11#include <unordered_map>
12
13
15namespace m {
16
18namespace pe {
19
21 returns<void>::
22 crtp_args<PlanTableSmallOrDense&, PlanTableLargeAndSparse&>::
23 args<const QueryGraph&, const CostFunction&> { };
24
26struct M_EXPORT PlanEnumerator : enumerate_tag::base_type
27{
29 using enumerate_tag::base_type::operator();
30
31 virtual ~PlanEnumerator() { }
32
34 template<typename PlanTable>
35 void operator()(const QueryGraph &G, const CostFunction &CF, PlanTable &PT) const {
36 operator()(enumerate_tag{}, PT, G, CF);
37 }
38};
39
40template<typename Actual>
42 , enumerate_tag::derived_type<Actual>
43{
44 using PlanEnumerator::operator();
45};
46
48struct M_EXPORT DPccp final : PlanEnumeratorCRTP<DPccp>
49{
51 using base_type::operator();
52
53 template<typename PlanTable>
54 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const;
55};
56
58struct M_EXPORT GOO : PlanEnumeratorCRTP<GOO>
59{
61 using base_type::operator();
62
63 struct node
64 {
67
68 node() = default;
69 node(Subproblem subproblem, Subproblem neighbors)
70 : subproblem(subproblem), neighbors(neighbors)
71 { }
72
74 bool can_merge_with(const node &other) const {
75 M_insist(bool(this->subproblem & other.neighbors) == bool(other.subproblem & this->neighbors));
76 return bool(this->subproblem & other.neighbors);
77 }
78
80 node merge(const node &other) const {
81 M_insist(can_merge_with(other));
82 const Subproblem S = this->subproblem | other.subproblem;
83 return node(S, (this->neighbors | other.neighbors) - S);
84 }
85
87 node operator+(const node &other) const { return merge(other); }
89 node & operator+=(const node &other) { *this = *this + other; return *this; }
90
92 bool operator&(const node &other) const { return can_merge_with(other); }
93 };
94
96 template<typename Callback, typename PlanTable>
97 void for_each_join(Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
98 const CostFunction&, const CardinalityEstimator &CE, node *begin, node *end) const
99 {
100 cnf::CNF condition; // TODO use join condition
101 while (begin + 1 != end) {
102 using std::swap;
103
104 /*----- Find two most promising subproblems to join. -----*/
105 node *left = nullptr, *right = nullptr;
106 double least_cardinality = std::numeric_limits<double>::infinity();
107 for (node *outer = begin; outer != end; ++outer) {
108 for (node *inner = std::next(outer); inner != end; ++inner) {
109 if (*outer & *inner) { // can be merged
110 M_insist((outer->subproblem & inner->subproblem).empty());
111 M_insist(M.is_connected(outer->subproblem, inner->subproblem));
112 const Subproblem joined = outer->subproblem | inner->subproblem;
113 if (not PT[joined].model)
114 PT[joined].model = CE.estimate_join(G, *PT[outer->subproblem].model,
115 *PT[inner->subproblem].model, condition);
116 const double C_joined = CE.predict_cardinality(*PT[joined].model);
117 if (C_joined < least_cardinality) {
118 least_cardinality = C_joined;
119 left = outer;
120 right = inner;
121 }
122 }
123 }
124 }
125
126 /*----- Issue callback. -----*/
127 M_insist((left->subproblem & right->subproblem).empty());
128 M_insist(M.is_connected((left->subproblem & right->subproblem)));
129 callback(left->subproblem, right->subproblem);
130
131 /*----- Join the two most promising subproblems found. -----*/
132 M_insist(left);
133 M_insist(right);
134 M_insist(left < right);
135 *left += *right; // merge `right` into `left`
136 swap(*right, *--end); // erase old `right`
137 }
138 }
139 template<typename PlanTable>
140 void compute_plan(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
141 const CostFunction &CF, const CardinalityEstimator &CE, node *begin, node *end) const
142 {
144 for_each_join([&](const Subproblem left, const Subproblem right){
145 static cnf::CNF condition;
146 PT.update(G, CE, CF, left, right, condition);
147 }, PT, G, M, CF, CE, begin, end);
148 }
149
150 template<typename PlanTable>
151 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const;
152};
153
155struct M_EXPORT TDGOO : PlanEnumeratorCRTP<TDGOO>
156{
158 using base_type::operator();
159
162 template<typename Callback, typename PlanTable>
163 void for_each_join(Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M,
164 const CostFunction&, const CardinalityEstimator &CE, std::vector<Subproblem> subproblems) const
165 {
166 std::vector<Subproblem> worklist(std::move(subproblems));
167 worklist.reserve(G.num_sources());
168 std::vector<std::pair<Subproblem, Subproblem>> joins;
169 joins.reserve(G.num_sources() - 1);
170
171 while (not worklist.empty()) {
172 const Subproblem top = worklist.back();
173 worklist.pop_back();
174 if (top.is_singleton())
175 continue; // nothing to be done for singletons
176
177 double C_min = std::numeric_limits<decltype(C_min)>::infinity();
178 Subproblem min_left, min_right;
179 auto enumerate_ccp = [&](Subproblem left, Subproblem right) -> void {
180 if (not PT[left].model)
181 PT[left].model = CE.estimate_join_all(G, PT, left, cnf::CNF{}); // TODO: use actual condition
182 if (not PT[right].model)
183 PT[right].model = CE.estimate_join_all(G, PT, right, cnf::CNF{}); // TODO: use actual condition
184 const double C = CE.predict_cardinality(*PT[left].model) + CE.predict_cardinality(*PT[right].model);
185 if (C < C_min) {
186 C_min = C;
187 min_left = left;
188 min_right = right;
189 }
190 };
191 MinCutAGaT{}.partition(M, enumerate_ccp, top);
192
193 /*----- Save joins on a stack to process them later in reverse order (bottom up). -----*/
194 M_insist(not min_left.empty(), "no partition found");
195 M_insist(not min_right.empty(), "no partition found");
196 joins.emplace_back(min_left, min_right); // save join to stack
197
198 worklist.emplace_back(min_left);
199 worklist.emplace_back(min_right);
200 }
201
202 /*----- Issue callback for joins in bottom-up fashion. -----*/
203 for (auto it = joins.crbegin(); it != joins.crend(); ++it)
204 callback(it->first, it->second);
205 }
206
207 template<typename PlanTable>
208 void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const;
209};
210
211}
212
213}
struct @5 args
#define M_insist(...)
Definition: macro.hpp:129
‍mutable namespace
Definition: Backend.hpp:10
void swap(PlanTableBase< Actual > &first, PlanTableBase< Actual > &second)
Definition: PlanTable.hpp:394
An adjacency matrix for a given query graph.
bool is_connected(SmallBitset S) const
Returns true iff the subproblem S is connected.
std::unique_ptr< DataModel > estimate_join_all(const QueryGraph &G, const PlanTable &PT, Subproblem to_join, const cnf::CNF &condition) const
Compute a DataModel for the result of joining all DataSources in to_join by condition.
virtual std::unique_ptr< DataModel > estimate_join(const QueryGraph &G, const DataModel &left, const DataModel &right, const cnf::CNF &condition) const =0
Form a new DataModel by joining two DataModels.
virtual std::size_t predict_cardinality(const DataModel &data) const =0
void partition(const AdjacencyMatrix &M, Callback &&callback, const Subproblem S) const
Definition: MinCutAGaT.hpp:73
The query graph represents all data sources and joins in a graph structure.
Definition: QueryGraph.hpp:172
std::size_t num_sources() const
Returns the number of DataSources in this graph.
Definition: QueryGraph.hpp:224
Implements a small and efficient set over integers in the range of 0 to 63 (including).
Definition: ADT.hpp:26
bool is_singleton() const
Definition: ADT.hpp:165
bool empty() const
Returns true if there are no elements in this SmallBitset.
Definition: ADT.hpp:163
A helper class to introduce a virtual method overload per type to a class hierarchy.
Definition: crtp.hpp:93
A CNF represents a conjunction of cnf::Clauses.
Definition: CNF.hpp:134
Computes the join order using connected subgraph complement pairs (CCP).
node operator+(const node &other) const
Merges this and other.
Subproblem neighbors
node merge(const node &other) const
Merge two nodes.
Subproblem subproblem
node(Subproblem subproblem, Subproblem neighbors)
bool can_merge_with(const node &other) const
Checks whether two nodes can be merged.
bool operator&(const node &other) const
Checks whether this node node can be merged with other.
node & operator+=(const node &other)
Merges other into this node.
Greedy operator ordering.
void for_each_join(Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &, const CardinalityEstimator &CE, node *begin, node *end) const
Enumerate the sequence of joins that yield the smallest subproblem in each step.
void compute_plan(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE, node *begin, node *end) const
An interface for all plan enumerators.
void operator()(const QueryGraph &G, const CostFunction &CF, PlanTable &PT) const
Enumerate subplans and fill plan table.
Top-down version of greedy operator ordering.
void for_each_join(Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &, const CardinalityEstimator &CE, std::vector< Subproblem > subproblems) const
Enumerate the sequence of graph cuts that yield the smallest subproblems in each step.