36 using base_type::operator();
38 template<
typename PlanTable>
41 const std::size_t n = sources.size();
44 for (std::size_t i = 1, end = 1UL << n; i < end; ++i) {
46 if (S.
size() == 1)
continue;
48 uint64_t offset = S.
capacity() - __builtin_clzl(uint64_t(S));
49 M_insist(offset != 0,
"invalid subproblem offset");
53 M_insist(PT.has_plan(S1),
"must have found the optimal plan for S1");
54 M_insist(PT.has_plan(S2),
"must have found the optimal plan for S2");
57 PT.update(G, CE, CF, S1, S2, condition);
72 using base_type::operator();
74 template<
typename PlanTable>
77 std::size_t n = sources.size();
82 for (std::size_t s = 2; s <= n; ++s) {
83 for (std::size_t s1 = 1; s1 < s; ++s1) {
84 std::size_t s2 = s - s1;
88 if (not PT.has_plan(*S1))
continue;
90 if (not PT.has_plan(*S2))
continue;
91 if (*S1 & *S2)
continue;
94 PT.update(G, CE, CF, *S1, *S2, condition);
113 using base_type::operator();
115 template<
typename PlanTable>
118 std::size_t n = sources.size();
123 for (std::size_t s = 2; s <= n; ++s) {
124 std::size_t
m = s / 2;
125 for (std::size_t s1 = 1; s1 <=
m; ++s1) {
126 std::size_t s2 = s - s1;
131 if (not PT.has_plan(*S1))
continue;
133 for (++S2; S2; ++S2) {
134 if (not PT.has_plan(*S2))
continue;
135 if (*S1 & *S2)
continue;
139 PT.update(G, CE, CF, *S1, *S2, condition);
144 if (not PT.has_plan(*S1))
continue;
146 if (not PT.has_plan(*S2))
continue;
147 if (*S1 & *S2)
continue;
151 PT.update(G, CE, CF, *S1, *S2, condition);
169 using base_type::operator();
171 template<
typename PlanTable>
174 std::size_t n = sources.size();
179 for (std::size_t s = 2; s <= n; ++s) {
185 if (not PT.has_plan(O))
continue;
186 if (not PT.has_plan(Comp))
continue;
188 PT.update(G, CE, CF, O, Comp, condition);
204 using base_type::operator();
206 template<
typename PlanTable>
209 const std::size_t n = sources.size();
213 for (std::size_t i = 1, end = 1UL << n; i < end; ++i) {
215 if (S.
size() == 1)
continue;
220 if (not PT.has_plan(S1))
continue;
221 if (not PT.has_plan(S2))
continue;
223 PT.update(G, CE, CF, S1, S2, condition);
239 using base_type::operator();
241 template<
typename PlanTable>
244 const std::size_t n = sources.size();
248 for (std::size_t i = 1, end = 1UL << n; i < end; ++i) {
250 if (S.
size() == 1)
continue;
253 uint64_t offset = S.
capacity() - __builtin_clzl(uint64_t(S));
254 M_insist(offset != 0,
"invalid subproblem offset");
259 if (not PT.has_plan(S1))
continue;
260 if (not PT.has_plan(S2))
continue;
263 PT.update(G, CE, CF, S1, S2, condition);
274template<
typename PlanTable>
283 PT.update(G, CE, CF, left, right, condition);
301 using base_type::operator();
303 template<
typename PlanTable>
304 std::vector<std::size_t>
309 auto selectivity = [&](std::size_t u, std::size_t v) {
312 auto &model_u = *PT[
U].model;
313 auto &model_v = *PT[V].model;
314 auto &joined = PT[
U|V];
315 if (not joined.model) {
317 joined.model = CE.
estimate_join(G, model_u, model_v, condition);
327 std::vector<std::size_t> linearization;
329 std::vector<std::size_t> best_linearization;
331 double least_cost = std::numeric_limits<
decltype(least_cost)>::infinity();
337 auto rank = [&](std::size_t parent_id, std::size_t child_id) ->
double {
343 const auto &model_parent = *PT[parent].model;
344 const auto &model_child = *PT[child].model;
345 if (not PT[parent|child].model) {
347 PT[parent|child].model = CE.
estimate_join(G, model_parent, model_child, condition);
352 auto g = [](
double cardinality) ->
double {
return 1 * cardinality; };
353 return (cardinality_joined - cardinality_parent) / g(cardinality_child);
356 struct ranked_relation
363 ranked_relation(std::size_t
id,
double rank) : id_(
id), rank_(rank) { }
365 std::size_t
id()
const {
return id_; }
366 double rank()
const {
return rank_; }
368 bool operator<(
const ranked_relation &other)
const {
return this->rank() < other.rank(); }
369 bool operator>(
const ranked_relation &other)
const {
return this->rank() > other.rank(); }
373 for (std::size_t root_id = 0; root_id != G.
num_sources(); ++root_id) {
376 linearization.clear();
377 linearization.emplace_back(root_id);
380 std::priority_queue<ranked_relation, std::vector<ranked_relation>, std::greater<ranked_relation>> Q;
381 for (std::size_t n : MST.
neighbors(root))
382 Q.emplace(n, rank(root_id, n));
385 while (not Q.empty()) {
386 ranked_relation ranked_relation = Q.top();
392 linearization.emplace_back(ranked_relation.id());
396 PT.update(G, CE, CF, joined, R, condition);
401 for (std::size_t n : N)
402 Q.emplace(n, rank(ranked_relation.id(), n));
406 const double cost = PT[joined].cost;
410 M_insist(linearization[0] == root_id);
411 for (std::size_t i = 1; i != linearization.size(); ++i) {
412 const std::size_t R = linearization[i];
416 M_insist(
bool(PT[runner].model),
"must have computed a model during linearization");
417 M_insist(
bool(PT[runner].left),
"must have a left subplan");
418 M_insist(
bool(PT[runner].right),
"must have a right subplan");
422 if (cost < least_cost) {
424 swap(best_linearization, linearization);
429 return best_linearization;
432 template<
typename PlanTable>
438 const std::vector<std::size_t> linearization = linearize(PT, G, M, CF, CE);
443 for (std::size_t i = 1; i < G.
num_sources(); ++i) {
446 PT.update(G, CE, CF, left, right, condition);
447 right = left | right;
460 using base_type::operator();
474 Sequence(
const std::vector<std::size_t> &linearization, std::size_t begin, std::size_t end)
475 : linearization_(linearization), begin_(begin), end_(end)
477 S_ = compute_subproblem_from_sequence(begin_, end_);
480 std::size_t
length()
const {
return end_ - begin_; }
486 std::size_t
end()
const {
return end_; }
488 std::size_t
first()
const {
return linearization_[first_index()]; }
489 std::size_t
last()
const {
return linearization_[last_index()]; }
492 bool is_at_back()
const {
return end_ == linearization_.size(); }
496 M_insist(end <= linearization_.size());
499 S[linearization_[begin++]] =
true;
510 M_insist(end_ < linearization_.size());
536 void dump(std::ostream &out)
const { out << *
this << std::endl; }
540 template<
typename PlanTable>
546 const std::vector<std::size_t> linearization =
IKKBZ{}.
linearize(PT, G, M, CF, CE);
550 bool moves_right =
true;
558 const bool is_left_connected = PT.has_plan(left);
559 const bool is_right_connected = PT.has_plan(right);
560 if (is_left_connected
and is_right_connected) {
562 PT.update(G, CE, CF, left, right, condition);
567 if (seq.
length() == num_relations)
600 using base_type::operator();
602 template<
typename PlanTable>
606 if (not PT.has_plan(S)) {
616 PlanGen(G, M, CF, CE, PT, sub);
617 PlanGen(G, M, CF, CE, PT, complement);
621 PT.update(G, CE, CF, sub, complement, condition);
627 template<
typename PlanTable>
630 std::size_t n = sources.size();
646 using base_type::operator();
648 template<
typename PlanTable>
655 auto handle_ccp = std::bind(recurse, std::placeholders::_1, std::placeholders::_2, recurse);
657 if (not PT.has_plan(first))
660 if (not PT.has_plan(second))
666 PT.update(G, CE, CF, first, second, condition);
668 recurse(first, second, recurse);
683template<
typename PlanTable>
691 for (std::size_t i = 0; i != G.
num_sources(); ++i) {
694 nodes[i] =
node(S, N);
706template<
typename PlanTable>
713 std::vector<Subproblem> worklist;
715 worklist.emplace_back(All);
719 PT.update(G, CE, CF, left, right,
cnf::CNF{});
721 for_each_join(update_PT, PT, G, M, CF, CE, std::move(worklist));
726 X(DPccp, "enumerates connected subgraph complement pairs") \
727 X(DPsize, "size-based subproblem enumeration") \
728 X(DPsizeOpt, "optimized DPsize: does not enumerate symmetric subproblems") \
729 X(DPsizeSub, "DPsize with enumeration of subset complement pairs") \
730 X(DPsub, "subset-based subproblem enumeration") \
731 X(DPsubOpt, "optimized DPsub: does not enumerate symmetric subproblems") \
732 X(GOO, "Greedy Operator Ordering") \
733 X(TDGOO, "Top-down variant of Greedy Operator Ordering") \
734 X(IKKBZ, "greedy algorithm by IK/KBZ, ordering joins by rank") \
735 X(LinearizedDP, "DP with search space linearization based on IK/KBZ") \
736 X(TDbasic, "basic top-down join enumeration using generate-and-test partitioning") \
737 X(TDMinCutAGaT, "top-down join enumeration using minimal graph cuts and advanced generate-and-test partitioning") \
738 X(PEall, "enumerates ALL join orders, inclding Cartesian products")
740#define INSTANTIATE(NAME, _) \
741 template void NAME::operator()(enumerate_tag, PlanTableSmallOrDense &PT, const QueryGraph &G, const CostFunction &CF) const; \
742 template void NAME::operator()(enumerate_tag, PlanTableLargeAndSparse &PT, const QueryGraph &G, const CostFunction &CF) const;
747static
void register_plan_enumerators()
750#define REGISTER(NAME, DESCRIPTION) \
751 C.register_plan_enumerator(C.pool(#NAME), std::make_unique<NAME>(), DESCRIPTION);
__attribute__((constructor(202))) static void register_interpreter()
#define INSTANTIATE(CLASS)
plan enumerator namespace
void swap(PlanTableBase< Actual > &first, PlanTableBase< Actual > &second)
SmallBitset least_subset(SmallBitset S)
Returns the least subset of a given set, i.e. the set represented by the lowest 1 bit.
and arithmetic< U > and same_signedness< T, U > U
SmallBitset next_subset(SmallBitset subset, SmallBitset set)
Returns the next subset of a given subset and `set.
Computes the join order using size-based dynamic programming.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Computes the join order using size-based dynamic programming.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Computes the join order using size-based dynamic programming.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Computes the join order using subset-based dynamic programming.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Computes the join order using subset-based dynamic programming.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Implements join ordering using the IK/KBZ algorithm.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
std::vector< std::size_t > linearize(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE) const
Sequence(const std::vector< std::size_t > &linearization, std::size_t begin, std::size_t end)
std::size_t first() const
std::size_t first_index() const
Returns the index in linearization of the first element of this sequence.
Subproblem compute_subproblem_from_sequence(std::size_t begin, std::size_t end)
std::size_t end_
the index in the linearization one past the last element of this sequence
Subproblem subproblem() const
std::size_t last_index() const
Returns the index in linearization of the last element of this sequence.
friend std::ostream & operator<<(std::ostream &out, const Sequence &seq)
void dump(std::ostream &out) const
Subproblem S_
the subproblem formed by this sequence
std::size_t length() const
std::size_t begin_
the index in the linearization of the first element of this sequence
const std::vector< std::size_t > & linearization_
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
Computes the join order by enumerating all join orders, including Cartesian products.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
void PlanGen(const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE, PlanTable &PT, Subproblem S) const
An adjacency matrix for a given query graph.
void for_each_CSG_pair_undirected(SmallBitset super, std::function< void(SmallBitset, SmallBitset)> callback) const
Enumerate all pairs of connected subgraphs (CSGs) that are connected by at least one edge.
bool is_connected(SmallBitset S) const
Returns true iff the subproblem S is connected.
AdjacencyMatrix minimum_spanning_forest(std::function< double(std::size_t, std::size_t)> weight) const
Computes the minimum spanning forest for this graph.
SmallBitset neighbors(SmallBitset S) const
Computes the neighbors of S.
virtual std::unique_ptr< DataModel > estimate_join(const QueryGraph &G, const DataModel &left, const DataModel &right, const cnf::CNF &condition) const =0
Form a new DataModel by joining two DataModels.
virtual std::size_t predict_cardinality(const DataModel &data) const =0
The catalog contains all Databases and keeps track of all meta information of the database system.
Database & get_database_in_use()
Returns a reference to the Database that is currently in use, if any.
static Catalog & Get()
Return a reference to the single Catalog instance.
std::unique_ptr< CardinalityEstimator > cardinality_estimator(std::unique_ptr< CardinalityEstimator > CE)
Sets the CardinalityEstimator of this Database.
Enumerate all subsets of size k based on superset of size n.
static GospersHack enumerate_from(SmallBitset set, uint64_t n)
Create an instance of GospersHack that enumerates all remaining subsets of a set of n elements,...
static GospersHack enumerate_all(uint64_t k, uint64_t n)
Create an instance of GospersHack that enumerates all subsets of size k of a set of n elements.
void partition(const AdjacencyMatrix &M, Callback &&callback, const Subproblem S) const
The query graph represents all data sources and joins in a graph structure.
std::size_t num_sources() const
Returns the number of DataSources in this graph.
const auto & sources() const
AdjacencyMatrix & adjacency_matrix()
Implements a small and efficient set over integers in the range of 0 to 63 (including).
bool is_singleton() const
constexpr std::size_t capacity()
Returns the maximum capacity.
static SmallBitset All(std::size_t n)
Factory method for creating a SmallBitset with first n bits set.
std::size_t size() const
Returns the number of elements in this SmallBitset.
static SmallBitset Singleton(std::size_t n)
Factory method for creating a Singleton Smallbitset with n -th bit set.
A CNF represents a conjunction of cnf::Clauses.
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
void compute_plan(PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &CF, const CardinalityEstimator &CE, node *begin, node *end) const
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
QueryGraph::Subproblem Subproblem
void operator()(enumerate_tag, PlanTable &PT, const QueryGraph &G, const CostFunction &CF) const
void for_each_join(Callback &&callback, PlanTable &PT, const QueryGraph &G, const AdjacencyMatrix &M, const CostFunction &, const CardinalityEstimator &CE, std::vector< Subproblem > subproblems) const
Enumerate the sequence of graph cuts that yield the smallest subproblems in each step.