27 operator unsigned()
const {
return weight_; }
30 for (
auto &clause : cnf)
35 for (
auto pred : clause)
43 if (
auto cs = cast<const CharacterSequence>(d.type()))
49 if (
auto cs = cast<const CharacterSequence>(e.
type()))
65 constexpr unsigned MAX_WEIGHT = 12;
69 std::vector<unsigned> clause_weights;
70 clause_weights.reserve(filter.size());
71 for (
auto &clause : filter) {
74 clause_weights.emplace_back(W);
78 std::vector<std::size_t> order(filter.size(), 0);
79 std::iota(order.begin(), order.end(), 0);
80 std::sort(order.begin(), order.end(), [&clause_weights](std::size_t first, std::size_t second) ->
bool {
81 return clause_weights[first] < clause_weights[second];
85 std::vector<cnf::CNF> optimized_filters;
86 unsigned current_weight = 0;
88 for (std::size_t i = 0, end = filter.size(); i != end; ++i) {
89 const std::size_t idx = order[i];
92 const unsigned clause_weight = clause_weights[idx];
94 if (not current_filter.empty()
and current_weight + clause_weight > MAX_WEIGHT) {
95 optimized_filters.emplace_back(std::move(current_filter));
99 current_filter.emplace_back(std::move(clause));
100 current_weight += clause_weight;
102 if (not current_filter.empty())
103 optimized_filters.emplace_back(std::move(current_filter));
105 M_insist(not optimized_filters.empty());
106 return optimized_filters;
109std::vector<Optimizer::projection_type>
111 const std::vector<order_type> &order_by)
113 std::vector<Optimizer::projection_type> required_projections;
118 if (
auto t = std::get_if<const Expr*>(&d.target())) {
120 for (
auto &[expr, _] : projections) {
121 if (*t == &expr.get())
126 for (
auto &[expr, alias] : projections) {
127 if (not alias.has_value()
and d == expr.get())
134 for (
auto &[expr, alias] : projections) {
135 if (not alias.has_value()
and fn == expr.get())
141 [](
auto&&) ->
void { },
144 for (
auto [expr, _] : order_by)
147 return required_projections;
164 auto [plan, PT] = optimize_with_plantable<PlanTableSmallOrDense>(G);
165 return { std::move(plan), std::move(PT.get_final()) };
167 auto [plan, PT] = optimize_with_plantable<PlanTableLargeAndSparse>(G);
168 return { std::move(plan), std::move(PT.get_final()) };
173 auto [plan, PT] = optimize_with_plantable<PlanTableSmallOrDense>(G);
174 return { std::move(plan), std::move(PT.get_final()) };
178 auto [plan, PT] = optimize_with_plantable<PlanTableLargeAndSparse>(G);
179 return { std::move(plan), std::move(PT.get_final()) };
184template<
typename PlanTable>
188 const auto num_sources = G.
sources().size();
190 auto &CE = C.get_database_in_use().cardinality_estimator();
192 if (num_sources == 0) {
193 PT.get_final().cost = 0;
194 PT.get_final().model = CE.empty_model();
195 return { std::make_unique<ProjectionOperator>(G.
projections()), std::move(PT) };
204 auto &entry = PT.get_final();
209 return { std::move(plan), std::move(PT) };
212template<
typename PlanTable>
215 const auto num_sources = G.
sources().size();
218 auto source_plans = std::make_unique<Producer*[]>(num_sources);
221 if (
auto bt = cast<const BaseTable>(ds.get())) {
224 PT[s].model = CE.estimate_scan(G, s);
225 auto &store = bt->table().store();
226 auto source = std::make_unique<ScanOperator>(store, bt->name().assert_not_none());
229 auto source_info = std::make_unique<OperatorInformation>();
230 source_info->subproblem = s;
231 source_info->estimated_cardinality = CE.predict_cardinality(*PT[s].model);
232 source->info(std::move(source_info));
234 source_plans[ds->id()] = source.release();
237 auto &Q = as<const Query>(*ds);
239 auto [sub_plan, sub] =
optimize(Q.query_graph());
243 if (Q.alias().has_value()) {
244 M_insist(is<ProjectionOperator>(sub_plan),
"only projection may rename attributes");
246 for (
auto &e : sub_plan->schema())
247 S.
add({ Q.alias(), e.id.name }, e.type, e.constraints);
248 sub_plan->schema() = S;
253 PT[s].cost = sub.cost;
254 sub.model->assign_to(s);
255 PT[s].model = std::move(sub.model);
256 source_plans[ds->id()] = sub_plan.release();
260 if (ds->filter().size()) {
263 Producer *filtered_ds = source_plans[ds->id()];
266 for (
auto &&filter : filters) {
268 auto new_model = CE.estimate_filter(G, *PT[s].model, filter);
269 PT[s].model = std::move(new_model);
271 if (filter.size() == 1
and filter[0].size() > 1) {
272 auto tmp = std::make_unique<DisjunctiveFilterOperator>(std::move(filter));
273 tmp->add_child(filtered_ds);
274 filtered_ds = tmp.release();
276 auto tmp = std::make_unique<FilterOperator>(std::move(filter));
277 tmp->add_child(filtered_ds);
278 filtered_ds = tmp.release();
282 auto source_info = std::make_unique<OperatorInformation>();
283 source_info->subproblem = s;
284 source_info->estimated_cardinality = CE.predict_cardinality(*PT[s].model);
285 filtered_ds->
info(std::move(source_info));
288 source_plans[ds->id()] = filtered_ds;
294template<
typename PlanTable>
302 std::size_t num_CSGs = 0, num_CCPs = 0;
304 auto inc_CSGs = [&num_CSGs](
Subproblem) { ++num_CSGs; };
308 std::cout << num_CSGs <<
" CSGs, " << num_CCPs <<
" CCPs" << std::endl;
315 std::cout <<
"Est. total cost: " << PT.get_final().cost
316 <<
"\nEst. result set size: " << CE.predict_cardinality(*PT.get_final().model)
317 <<
"\nPlan cost: " << PT[PT.get_final().left].cost + PT[PT.get_final().right].cost
322template<
typename PlanTable>
324 const std::unique_ptr<
Producer*[]> &source_plans)
const
328 std::vector<std::reference_wrapper<Join>> joins;
329 for (
auto &J : G.
joins()) joins.emplace_back(*J);
334 auto subproblems = PT[s].get_subproblems();
335 if (subproblems.empty()) {
337 return source_plans[*s.
begin()];
340 std::vector<Producer*> sub_plans;
341 for (
auto sub : subproblems)
342 sub_plans.push_back(construct_plan_rec(sub, construct_plan_rec));
346 for (
auto it = joins.begin(); it != joins.end(); ) {
349 for (
auto ds : it->get().sources())
350 join_sources(ds.get().id()) =
true;
353 join_condition = join_condition
and it->get().condition();
354 it = joins.erase(it);
361 auto join = std::make_unique<JoinOperator>(join_condition);
362 for (
auto sub_plan : sub_plans)
363 join->add_child(sub_plan);
364 auto join_info = std::make_unique<OperatorInformation>();
365 join_info->subproblem = s;
366 join_info->estimated_cardinality = CE.predict_cardinality(*PT[s].model);
367 join->info(std::move(join_info));
368 return join.release();
371 return construct_plan_impl(s, construct_plan_impl);
385 auto new_model = CE.estimate_grouping(G, *entry.
model, G.
group_by());
386 entry.
model = std::move(new_model);
389 group_by->add_child(plan.release());
392 auto info = std::make_unique<OperatorInformation>();
394 info->estimated_cardinality = CE.predict_cardinality(*entry.
model);
396 group_by->info(std::move(info));
397 plan = std::move(group_by);
400 auto new_model = CE.estimate_grouping(G, *entry.
model, std::vector<GroupingOperator::group_type>());
401 entry.
model = std::move(new_model);
402 auto agg = std::make_unique<AggregationOperator>(G.
aggregates());
403 agg->add_child(plan.release());
406 auto info = std::make_unique<OperatorInformation>();
408 info->estimated_cardinality = CE.predict_cardinality(*entry.
model);
410 agg->info(std::move(info));
411 plan = std::move(agg);
415 const bool requires_post_projection = not additional_projections.empty();
418 if (not additional_projections.empty() or not G.
projections().empty()) {
420 additional_projections.insert(additional_projections.end(), G.
projections().begin(), G.
projections().end());
421 auto projection = std::make_unique<ProjectionOperator>(std::move(additional_projections));
422 projection->add_child(plan.release());
425 auto info = std::make_unique<OperatorInformation>();
427 info->estimated_cardinality = projection->child(0)->info().estimated_cardinality;
429 projection->info(std::move(info));
430 plan = std::move(projection);
436 auto order_by = std::make_unique<SortingOperator>(G.
order_by());
437 order_by->add_child(plan.release());
440 auto info = std::make_unique<OperatorInformation>();
442 info->estimated_cardinality = order_by->child(0)->info().estimated_cardinality;
444 order_by->info(std::move(info));
445 plan = std::move(order_by);
451 auto new_model = CE.estimate_limit(G, *entry.
model, G.
limit().limit, G.
limit().offset);
452 entry.
model = std::move(new_model);
454 auto limit = std::make_unique<LimitOperator>(G.
limit().limit, G.
limit().offset);
455 limit->add_child(plan.release());
458 auto info = std::make_unique<OperatorInformation>();
460 info->estimated_cardinality = CE.predict_cardinality(*entry.
model);
462 limit->info(std::move(info));
463 plan = std::move(limit);
471 std::vector<projection_type> adapted_projections;
473 if (alias.has_value()) {
474 Token name(expr.get().tok.pos, alias.assert_not_none(), TK_IDENTIFIER);
476 std::move(name), expr.get().type(), &expr.get());
483 auto projection = std::make_unique<ProjectionOperator>(std::move(adapted_projections));
484 projection->add_child(plan.release());
487 auto info = std::make_unique<OperatorInformation>();
489 info->estimated_cardinality = projection->child(0)->info().estimated_cardinality;
491 projection->info(std::move(info));
492 plan = std::move(projection);
497#define DEFINE(PLANTABLE) \
499std::pair<std::unique_ptr<Producer>, PLANTABLE> \
500Optimizer::optimize_with_plantable(QueryGraph&) const; \
502std::unique_ptr<Producer*[]> \
503Optimizer::optimize_source_plans(const QueryGraph&, PLANTABLE&) const; \
506Optimizer::optimize_join_order(const QueryGraph&, PLANTABLE&) const; \
508std::unique_ptr<Producer> \
509Optimizer::construct_join_order(const QueryGraph&, const PLANTABLE&, const std::unique_ptr<Producer*[]>&) const
#define M_TIME_EXPR(EXPR, DESCR, TIMER)
#define M_unreachable(MSG)
auto visit(Callable &&callable, Base &obj, m::tag< Callable > &&=m::tag< Callable >())
Generic implementation to visit a class hierarchy, with similar syntax as std::visit.
void operator()(const cnf::Clause &clause)
void operator()(const ast::Expr &e)
void operator()(const cnf::CNF &cnf)
void for_each_CSG_pair_undirected(SmallBitset super, std::function< void(SmallBitset, SmallBitset)> callback) const
Enumerate all pairs of connected subgraphs (CSGs) that are connected by at least one edge.
void for_each_CSG_undirected(SmallBitset super, SmallBitset source, std::function< void(SmallBitset)> callback) const
Enumerate all connected subgraphs (CSGs) of the graph induced by vertex super set super,...
The catalog contains all Databases and keeps track of all meta information of the database system.
Database & get_database_in_use()
Returns a reference to the Database that is currently in use, if any.
static Catalog & Get()
Return a reference to the single Catalog instance.
Timer & timer()
Returns the global Timer instance.
std::unique_ptr< CardinalityEstimator > cardinality_estimator(std::unique_ptr< CardinalityEstimator > CE)
Sets the CardinalityEstimator of this Database.
OperatorInformation & info()
std::pair< std::unique_ptr< Producer >, PlanTableEntry > optimize(QueryGraph &G) const
Computes and constructs an optimal logical plan for the given query graph G.
std::pair< std::unique_ptr< Producer >, PlanTable > optimize_with_plantable(QueryGraph &G) const
Recursively computes and constructs an optimal logical plan for the given query graph G,...
void optimize_join_order(const QueryGraph &G, PlanTable &PT) const
Optimizes the join order using the plan table PT which already contains entries for all data sources ...
QueryGraph::Subproblem Subproblem
auto & plan_enumerator() const
std::unique_ptr< Producer > construct_join_order(const QueryGraph &G, const PlanTable &PT, const std::unique_ptr< Producer *[]> &source_plans) const
Constructs a join operator tree given a solved plan table PT and the plans to compute the data source...
auto & cost_function() const
std::unique_ptr< Producer > optimize_plan(const QueryGraph &G, std::unique_ptr< Producer > plan, PlanTableEntry &entry) const
Optimizes and constructs an operator tree given a join operator tree plan and the final plan table en...
bool needs_projection_
flag to determine whether current query needs a projection as root
std::vector< std::unique_ptr< const ast::Expr > > created_exprs_
additionally created expressions
std::unique_ptr< Producer *[]> optimize_source_plans(const QueryGraph &G, PlanTable &PT) const
Initializes the plan table PT with the data source entries contained in G.
static std::vector< cnf::CNF > optimize_filter(cnf::CNF filter)
Optimizes the filter filter by splitting it into smaller filters and ordering them.
static std::vector< projection_type > compute_projections_required_for_order_by(const std::vector< projection_type > &projections, const std::vector< order_type > &order_by)
Computes and returns a std::vector of additional projections required before evaluating the ORDER BY ...
static Options & Get()
Return a reference to the single Options instance.
std::unique_ptr< DataModel > model
the model of this subplan's result
This table represents all explored plans with their sub-plans, estimated size, cost,...
This table represents all explored plans with their sub-plans, estimated size, cost,...
A data type representing a pooled (or internalized) object.
A Producer is an Operator that can be evaluated to a sequence of tuples.
The query graph represents all data sources and joins in a graph structure.
const auto & group_by() const
const std::vector< projection_type > & projections() const
const auto & joins() const
std::size_t num_sources() const
Returns the number of DataSources in this graph.
const auto & aggregates() const
const auto & order_by() const
const auto & sources() const
AdjacencyMatrix & adjacency_matrix()
A Schema represents a sequence of identifiers, optionally with a prefix, and their associated types.
void add(entry_type e)
Adds the entry e to this Schema.
Implements a small and efficient set over integers in the range of 0 to 63 (including).
static SmallBitset All(std::size_t n)
Factory method for creating a SmallBitset with first n bits set.
std::size_t size() const
Returns the number of elements in this SmallBitset.
static SmallBitset Singleton(std::size_t n)
Factory method for creating a Singleton Smallbitset with n -th bit set.
bool is_subset(SmallBitset other) const
Returns true if the set represented by this is a subset of other, i.e. this ⊆ other.
A constant: a string literal or a numeric constant.
const Type * type() const
Returns the Type of this Expr.
A query expression for nested queries.
static Token CreateArtificial(TokenType type=TK_EOF)
A unary expression: "+e", "-e", "~e", "NOT e".
A CNF represents a conjunction of cnf::Clauses.
A cnf::Clause represents a disjunction of Predicates.
Exception class which can be thrown to skip recursion of the subtree in pre-order visitors.