1#include "IR/QueryGraph.hpp"
347std::vector<std::reference_wrapper<const ast::FnApplicationExpr>>
350 std::vector<std::reference_wrapper<const ast::FnApplicationExpr>> aggregates;
355 if (e.get_function().is_aggregate()) {
356 using std::find_if, std::to_string;
358 auto exists = [&str](std::reference_wrapper<const ast::Expr> agg) {
361 if (find_if(aggregates.begin(), aggregates.end(), exists) == aggregates.end())
362 aggregates.push_back(e);
371 for (
auto &s : as<ast::SelectClause>(*stmt.
select).select)
375 for (
auto &o : as<ast::OrderByClause>(*stmt.
order_by).order_by)
384 const std::vector<std::reference_wrapper<const ast::Expr>> components)
388 return d.contains_free_variables() or d.is_identifier();
392 for (
auto &arg : e.args) {
402 [](
auto&) ->
bool {
return true; },
405 for (
auto c : components)
406 if (expr == c.get())
return true;
423 [](
auto&) ->
void { },
426 if (D.has_table_name())
430 auto B = std::make_unique<GraphBuilder>();
436 for (
const auto &pred : clause)
446 auto &ref =
graph_->add_source(subquery.alias, subquery.builder->get());
448 for (
auto &[deferred_clause, deferred_CI] : subquery.builder->deferred_clauses_) {
449 M_insist(deferred_CI.binding_depth > 0,
"was this clause bound in its subquery?");
450 --deferred_CI.binding_depth;
453 if (deferred_CI.binding_depth == 0)
454 bound_clauses_.try_emplace(std::move(deferred_clause), std::move(deferred_CI));
456 deferred_clauses_.try_emplace(std::move(deferred_clause), std::move(deferred_CI));
471 auto pred = clause[0];
474 auto binary = cast<const ast::BinaryExpr>(&pred.expr());
478 if (binary->op().type != m::TK_EQUAL)
482 bool lhs_has_free_variables = binary->lhs->contains_free_variables();
483 bool rhs_has_free_variables = binary->rhs->contains_free_variables();
484 if (lhs_has_free_variables
and binary->lhs->contains_bound_variables())
486 if (rhs_has_free_variables
and binary->rhs->contains_bound_variables())
488 M_insist(lhs_has_free_variables != rhs_has_free_variables,
"only either side may contain free variables");
493 auto &expr_with_bound_vars = lhs_has_free_variables ? *binary->rhs : *binary->lhs;
517 for (
auto pred : clause) {
520 throw m::invalid_argument(
"the bound parts of a correlated clause cannot be composed of the "
560 const std::pair<const ast::Designator*, PooledOptionalString> &pair)
562 for (
auto p : pairs) {
563 if (
auto d = cast<const ast::Designator>(p.first); d
and equal(*d, *pair.first))
566 pairs.emplace_back(pair);
571 const std::pair<const ast::Expr*, PooledOptionalString> &pair)
573 if (
auto d = cast<const ast::Designator>(pair.first))
574 emplace_back_unique(pairs, std::pair<const ast::Designator*, PooledOptionalString>(d, pair.second));
576 pairs.emplace_back(pair);
582 for (
auto e : exprs) {
583 if (
auto d = cast<const ast::Designator>(e); d
and equal(*d, *des))
586 exprs.emplace_back(des);
592 if (
auto d = cast<const ast::Designator>(expr))
595 exprs.emplace_back(expr);
601 for (
auto s : sources) {
602 if (s->name() == src->
name())
605 sources.emplace_back(src);
610void insert(std::vector<const ast::Expr*> &exprs,
const std::vector<const ast::Designator*> &insertions) {
611 for (
auto d : insertions)
617void insert(std::vector<const ast::Expr*> &exprs,
const std::vector<const ast::Expr*> &insertions) {
618 for (
auto e : insertions) {
619 if (
auto d = cast<const ast::Designator>(e))
622 exprs.emplace_back(e);
635 std::vector<const ast::QueryExpr*>
get() {
return std::move(queries_); }
637 using ConstASTExprVisitor::operator();
646 void operator()(Const<ast::UnaryExpr> &e)
override { (*this)(*e.expr); }
648 void operator()(Const<ast::BinaryExpr> &e)
override { (*this)(*e.lhs); (*this)(*e.rhs); }
650 void operator()(Const<ast::QueryExpr> &e)
override { queries_.push_back(&e); }
655struct m::GetPrimaryKey
658 std::vector<const ast::Expr*> primary_keys_;
663 auto get() {
return std::move(primary_keys_); }
666 if (
auto base = cast<BaseTable>(source)) {
670 ast::Token tbl(pos, base->name(), TK_IDENTIFIER);
671 for (
auto pkey : base->table().primary_key()) {
672 ast::Token attr(pos, pkey->name, TK_IDENTIFIER);
674 primary_keys_.push_back(D);
677 }
else if (
auto query = cast<Query>(source)) {
678 auto &graph = query->query_graph();
679 auto former_size = primary_keys_.size();
680 if (graph.grouping()) {
682 for (
auto e : graph.group_by()) {
686 ast::Token tbl(pos, query->name(), TK_IDENTIFIER);
687 std::ostringstream oss;
690 auto D =
new ast::Designator(dot, tbl, attr, as<const PrimitiveType>(e.get().type())->as_scalar(), e);
691 primary_keys_.push_back(D);
695 for (
auto ds : graph.sources())
699 if (not graph.projections().empty()) {
700 while (former_size != primary_keys_.size()) {
702 std::pair<const ast::Expr *, const char *>(primary_keys_.at(former_size),
nullptr));
722struct GetDesignators : ast::ConstASTExprVisitor
725 std::vector<const ast::Expr*> designators_;
726 bool aggregates_ =
true;
731 std::vector<const ast::Expr*>
get() {
return std::move(designators_); }
733 void aggregates(
bool aggregates) { aggregates_ = aggregates; }
735 using ConstASTExprVisitor::operator();
736 void operator()(Const<ast::ErrorExpr>&)
override {
M_unreachable(
"graph must not contain errors"); }
738 void operator()(Const<ast::Designator> &e)
override { designators_.push_back(&e); }
740 void operator()(Const<ast::Constant>&)
override { }
742 void operator()(Const<ast::FnApplicationExpr> &e)
override {
744 designators_.push_back(&e);
746 for (
auto &arg : e.args)
751 void operator()(Const<ast::UnaryExpr> &e)
override { (*this)(*e.expr); }
753 void operator()(Const<ast::BinaryExpr> &e)
override { (*this)(*e.lhs); (*this)(*e.rhs); }
755 void operator()(Const<ast::QueryExpr>&)
override { }
767 auto it = std::find(graph->
sources().begin(), graph->
sources().end(), &query);
768 for (
auto &c : (*it)->filter()) {
772 for (
auto &j : (*it)->joins()) {
773 for (
auto &c : j->condition()) {
781 GD.aggregates(
false);
794struct ReplaceDesignators : ast::ConstASTExprVisitor
797 const std::vector<const ast::Expr*> *origin_;
799 std::vector<const ast::Expr*> deletions_;
802 ReplaceDesignators() { }
808 std::vector<const ast::Expr*>
replace(
cnf::CNF &cnf,
const std::vector<const ast::Expr*> *origin,
809 bool target_deleted =
false)
814 for (
auto &c : cnf) {
819 if (cast<const ast::Designator>(e)
and new_designator_) {
825 deletions_.push_back(e);
827 deletions_.push_back(new_designator_);
828 new_designator_ =
nullptr;
835 cnf = std::move(cnf_new);
836 return std::move(deletions_);
843 std::vector<const ast::Expr*>
replace(std::vector<const ast::Expr*> &target,
const std::vector<const ast::Expr*> *origin,
844 bool target_deleted =
false) {
847 std::vector<const ast::Expr*> target_new;
848 for (
auto e : target) {
850 if (cast<const ast::Designator>(e)
and new_designator_) {
854 target_new.emplace_back(new_designator_);
856 deletions_.push_back(e);
858 deletions_.push_back(new_designator_);
859 new_designator_ =
nullptr;
861 target_new.emplace_back(e);
864 target = std::move(target_new);
865 return std::move(deletions_);
872 std::vector<const ast::Expr*>
replace(std::vector<std::pair<const ast::Expr*, const char*>> &target,
873 const std::vector<const ast::Expr*> *origin,
bool target_deleted =
false)
877 std::vector<std::pair<const ast::Expr*, const char*>> target_new;
878 for (
auto e : target) {
880 if (cast<const ast::Designator>(e.first)
and new_designator_) {
884 target_new.emplace_back(new_designator_, e.second);
886 deletions_.push_back(e.first);
888 deletions_.push_back(new_designator_);
889 new_designator_ =
nullptr;
891 target_new.emplace_back(e);
894 target = std::move(target_new);
895 return std::move(deletions_);
899 using ConstASTExprVisitor::operator();
900 void operator()(Const<ast::ErrorExpr>&)
override {
M_unreachable(
"graph must not contain errors"); }
902 void operator()(Const<ast::Designator> &e)
override {
903 if (
auto d = findDesignator(e))
904 new_designator_ = make_designator(d);
907 void operator()(Const<ast::Constant>&)
override { }
909 void operator()(Const<ast::FnApplicationExpr> &e)
override {
910 std::vector<std::unique_ptr<ast::Expr>> args_new;
911 for (
auto &arg : e.args) {
913 if (new_designator_) {
915 deletions_.push_back(arg.get());
916 args_new.emplace_back(new_designator_);
917 new_designator_ =
nullptr;
919 args_new.emplace_back(arg.get());
925 void operator()(Const<ast::UnaryExpr> &e)
override {
927 if (new_designator_) {
929 deletions_.push_back(e.expr.get());
930 const_cast<ast::UnaryExpr*
>(&e)->expr = std::unique_ptr<ast::Designator>(new_designator_);
931 new_designator_ =
nullptr;
935 void operator()(Const<ast::BinaryExpr> &e)
override {
937 if (new_designator_) {
939 deletions_.push_back(e.lhs.get());
940 const_cast<ast::BinaryExpr*
>(&e)->lhs = std::unique_ptr<ast::Designator>(new_designator_);
941 new_designator_ =
nullptr;
944 if (new_designator_) {
946 deletions_.push_back(e.rhs.get());
947 const_cast<ast::BinaryExpr*
>(&e)->rhs = std::unique_ptr<ast::Designator>(new_designator_);
948 new_designator_ =
nullptr;
952 void operator()(Const<ast::QueryExpr>&)
override { }
956 for (
auto e : *origin_) {
957 if (
auto d = cast<const ast::Designator>(e)) {
971 std::ostringstream oss;
982struct m::GetCorrelationInfo : ast::ConstASTExprVisitor
984 friend struct Decorrelation;
987 struct CorrelationInfo : ast::ASTExprVisitor
990 std::set<const ast::Expr*> uncorrelated_exprs;
991 std::set<const char*> sources;
993 const char *table_name_;
994 std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> old_to_new_outer_;
995 std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> old_to_new_inner_;
996 std::unordered_set<const ast::Designator*> replaced_designators_;
997 bool is_decorrelation_finished_ =
false;
1008 const char *table_name, std::vector<const ast::Expr*> &expansion) {
1009 table_name_ = table_name;
1010 old_to_new_outer_ = std::move(old_to_new);
1012 for (
auto pred : clause)
1015 std::set<const ast::Expr*> uncorrelated_exprs_new;
1016 for (
auto e : uncorrelated_exprs) {
1017 if (
auto d = cast<const ast::Designator>(e)) {
1018 auto d_new = get_new(d);
1020 uncorrelated_exprs_new.emplace(old_to_new_inner_.at(d_new));
1023 replaced_designators_.emplace(d_new);
1024 }
catch (std::out_of_range&) {
1028 uncorrelated_exprs_new.emplace(d_new);
1031 uncorrelated_exprs_new.emplace(e);
1034 uncorrelated_exprs = std::move(uncorrelated_exprs_new);
1035 old_to_new_inner_.clear();
1037 for (
auto d : replaced_designators_)
1038 expansion.push_back(d);
1039 replaced_designators_.clear();
1043 void finish_decorrelation() {
1044 is_decorrelation_finished_ =
true;
1045 for (
auto &p : clause)
1050 using ASTExprVisitor::operator();
1051 void operator()(Const<ast::ErrorExpr>&)
override {
M_unreachable(
"graph must not contain errors"); }
1053 void operator()(Const<ast::Designator> &d)
override {
if (is_decorrelation_finished_) d.set_bound(); }
1055 void operator()(Const<ast::Constant>&)
override { }
1057 void operator()(Const<ast::FnApplicationExpr>&)
override { }
1059 void operator()(Const<ast::UnaryExpr> &e)
override {
1060 if (is_decorrelation_finished_) {
1063 if (
auto d = cast<ast::Designator>(e.expr.get()); d
and contains(uncorrelated_exprs, d)) {
1065 e.expr = std::unique_ptr<ast::Expr>(table_name_ ? set_table_name(get_new(d)) : get_new(d));
1066 replaced_designators_.emplace(d);
1073 void operator()(Const<ast::BinaryExpr> &e)
override {
1074 if (is_decorrelation_finished_) {
1078 if (
auto d = cast<ast::Designator>(e.lhs.get()); d
and contains(uncorrelated_exprs, d)) {
1080 e.lhs = std::unique_ptr<ast::Expr>(table_name_ ? set_table_name(get_new(d)) : get_new(d));
1081 replaced_designators_.emplace(d);
1085 if (
auto d = cast<ast::Designator>(e.rhs.get()); d
and contains(uncorrelated_exprs, d)) {
1087 e.rhs = std::unique_ptr<ast::Expr>(table_name_ ? set_table_name(get_new(d)) : get_new(d));
1088 replaced_designators_.emplace(d);
1095 void operator()(Const<ast::QueryExpr>&)
override { }
1098 if (
auto it = old_to_new_outer_.find(d); it != old_to_new_outer_.end())
1104 if (
auto it = old_to_new_outer_.find(d); it != old_to_new_outer_.end())
1116 old_to_new_inner_.emplace(d, D);
1122 std::vector<CorrelationInfo*> equi_;
1123 std::vector<CorrelationInfo*> non_equi_;
1125 CorrelationInfo *info_;
1127 std::vector<const ast::Expr*> expansion_;
1130 GetCorrelationInfo() { }
1131 ~GetCorrelationInfo() {
1132 for (
auto d : expansion_)
1140 for (
auto &c : cnf) {
1141 info_ =
new CorrelationInfo(c);
1142 bool is_equi = c.size() <= 1;
1146 is_equi = is_equi
and is_not_equal(&p.expr());
1148 is_equi = is_equi
and is_equal(&p.expr());
1150 if (info_->sources.empty()) {
1157 equi_.push_back(info_);
1159 non_equi_.push_back(info_);
1166 std::vector<CorrelationInfo*> getEqui() {
return std::move(equi_); }
1168 std::vector<CorrelationInfo*> getNonEqui() {
return std::move(non_equi_); }
1170 bool empty() {
return equi_.empty()
and non_equi_.empty(); }
1171 bool non_equi() {
return not non_equi_.empty(); }
1174 using ConstASTExprVisitor::operator();
1175 void operator()(Const<ast::ErrorExpr>&)
override {
M_unreachable(
"graph must not contain errors"); }
1177 void operator()(Const<ast::Designator> &e)
override {
1178 if (e.contains_free_variables()) {
1179 auto names = find_table_name(e);
1180 info_->sources.insert(names.begin(), names.end());
1182 info_->uncorrelated_exprs.emplace(&e);
1186 void operator()(Const<ast::Constant>&)
override { }
1188 void operator()(Const<ast::FnApplicationExpr> &e)
override { info_->uncorrelated_exprs.emplace(&e); }
1190 void operator()(Const<ast::UnaryExpr> &e)
override { (*this)(*e.expr); }
1192 void operator()(Const<ast::BinaryExpr> &e)
override { (*this)(*e.lhs); (*this)(*e.rhs); }
1194 void operator()(Const<ast::QueryExpr>&)
override { }
1197 if (
auto b = cast<const ast::BinaryExpr>(e))
1198 return b->op() == TK_EQUAL;
1204 if (
auto b = cast<const ast::BinaryExpr>(e))
1205 return b->op() == TK_BANG_EQUAL;
1212 if (std::holds_alternative<const Attribute*>(d.
target())) {
1213 return {std::get<const Attribute *>(d.
target())->table.name};
1214 }
else if (std::holds_alternative<const ast::Expr*>(d.
target())) {
1216 GD(*std::get<const ast::Expr*>(d.
target()));
1217 std::set<const char*>
res;
1218 for (
auto des : GD.get()) {
1220 if (
auto d = cast<const ast::Designator>(des)) {
1221 auto names = find_table_name(*d);
1222 res.insert(names.begin(), names.end());
1235struct m::Decorrelation
1239 using graphs_t = std::pair<QueryGraph*, Query*>;
1246 std::vector<graphs_t> correlated_nested_queries_;
1247 std::vector<const ast::Expr*> needed_attrs_;
1253 , correlated_nested_queries_({{graph,
nullptr}})
1258 bool decorrelate() {
1260 find_correlated_nested_graphs(query_.
query_graph(), &query_);
1264 while (correlated_nested_queries_.size() > 1) {
1265 auto last = correlated_nested_queries_.back();
1266 if (last.first->correlation_info_->non_equi()
and last.first->grouping())
break;
1267 correlated_nested_queries_.pop_back();
1268 push_predicates(correlated_nested_queries_.back().first, last.second);
1271 M_insist(not correlated_nested_queries_.empty());
1273 if (correlated_nested_queries_.size() > 1) {
1275 needed_attrs_ = get_needed_attrs(graph_, query_);
1277 for (
auto des : needed_attrs_) {
1284 for (
auto it = correlated_nested_queries_.begin(); it != std::prev(correlated_nested_queries_.end());) {
1288 pushOperators(lower.first, upper.first, lower.second);
1292 auto *origin = &correlated_nested_queries_.back().first->group_by_;
1294 for (
auto i = correlated_nested_queries_.size() - 1; i != 0; --i) {
1295 auto &upper = correlated_nested_queries_[i - 1];
1297 replace_designators(upper.first, origin);
1298 if (upper.first->grouping())
1299 origin = &upper.first->group_by_;
1303 auto iterator = correlated_nested_queries_.end();
1304 for (
auto it = correlated_nested_queries_.begin(); it != correlated_nested_queries_.end(); ++it) {
1305 if (not (*it).first->correlation_info_->empty()) {
1310 if (iterator == correlated_nested_queries_.end()) {
1316 for (
auto it = correlated_nested_queries_.begin(); it != iterator;)
1317 (*++it).second->decorrelated_ =
false;
1326 correlated_nested_queries_.emplace_back(graph, query);
1329 for (
auto ds : graph->
sources()) {
1330 if (ds->decorrelated_)
continue;
1331 M_insist(not found,
"Multiple not yet decorrelated nested queries in one graph should not be possible.");
1333 auto q = as<Query>(ds);
1334 find_correlated_nested_graphs(
q->query_graph(),
q);
1346 const bool inner_has_projection = not inner_graph->
projections().empty();
1347 auto grouping = inner_graph->
grouping();
1348 auto equi = inner_graph->correlation_info_->getEqui();
1349 auto non_equi = inner_graph->correlation_info_->getNonEqui();
1350 inner_graph->correlation_info_->equi_.clear();
1351 inner_graph->correlation_info_->non_equi_.clear();
1352 bool is_equi =
true;
1353 M_insist(non_equi.empty() or not grouping);
1354 for (
auto it = equi.begin(); it != non_equi.end(); ++it) {
1355 if (it == equi.end()) {
1356 it = non_equi.begin();
1358 if (it == non_equi.end())
break;
1360 auto correlation_info = *it;
1362 std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> old_to_new;
1363 for (
auto e : correlation_info->uncorrelated_exprs) {
1368 if (
auto d = cast<const ast::Designator>(e)) {
1371 std::ostringstream oss;
1375 auto d_new = std::make_unique<ast::Designator>(attr,
ast::Token(), attr, as<const PrimitiveType>(d->type())->as_scalar(), d);
1376 auto res = old_to_new.emplace(d, std::move(d_new));
1377 e_new =
res.first->second.get();
1380 if (inner_has_projection)
1383 std::pair<const ast::Expr*, const char*>(e_new,
nullptr)
1389 correlation_info->replace(old_to_new, query->
alias(), inner_graph->correlation_info_->expansion_);
1391 std::vector<DataSource*> sources{query};
1392 auto all_reachable =
true;
1393 for (
auto s : correlation_info->sources) {
1394 if (
auto src = findSource(outer_graph->
sources(), s)) {
1398 all_reachable =
false;
1403 if (all_reachable) {
1404 correlation_info->finish_decorrelation();
1405 if (
auto j = findJoin(outer_graph->
joins(), sources)) {
1407 j->update_condition(
cnf::CNF({correlation_info->clause}));
1410 auto J = outer_graph->
joins_.emplace_back(
new Join(
cnf::CNF({correlation_info->clause}), sources));
1411 for (
auto ds : J->sources())
1414 delete correlation_info;
1418 outer_graph->correlation_info_->equi_.push_back(correlation_info);
1420 outer_graph->correlation_info_->non_equi_.push_back(correlation_info);
1435 auto lower_sources = lower->
sources();
1438 for (
auto j : query->
joins()) {
1441 for (
auto ds : j->sources())
1448 size_t num_sources = lower->
sources().size();
1449 for (
auto it = upper->
sources_.begin(); it != upper->
sources_.end(); ++it) {
1450 if (*it == query)
continue;
1451 (*it)->id_ = num_sources++;
1455 M_insist(not primary_key.empty(),
"can not push-up grouping above source without primary key");
1468 for (
auto attr : needed_attrs_)
1472 auto equi = lower->correlation_info_->getEqui();
1473 auto non_equi = lower->correlation_info_->getNonEqui();
1474 lower->correlation_info_->equi_.clear();
1475 lower->correlation_info_->non_equi_.clear();
1476 bool is_equi =
true;
1477 for (
auto it = equi.begin(); it != non_equi.end(); ++it) {
1478 if (it == equi.end()) {
1479 it = non_equi.begin();
1481 if (it == non_equi.end())
break;
1486 std::vector<DataSource*> sources;
1487 auto all_reachable =
true;
1488 for (
auto e : i->uncorrelated_exprs) {
1489 if (
auto d = cast<const ast::Designator>(e)) {
1490 if (
auto src = findSource(lower->
sources(), d->get_table_name())) {
1494 all_reachable =
false;
1499 sources = lower_sources;
1503 for (
auto s : i->sources) {
1504 if (
auto src = findSource(lower->
sources(), s)) {
1508 all_reachable =
false;
1513 if (all_reachable) {
1514 i->finish_decorrelation();
1515 if (
auto j = findJoin(lower->
joins(), sources)) {
1517 j->update_condition(
cnf::CNF({i->clause}));
1521 for (
auto ds : J->sources())
1528 lower->correlation_info_->equi_.push_back(i);
1530 lower->correlation_info_->non_equi_.push_back(i);
1537 DataSource * findSource(
const std::vector<DataSource*> &sources,
const char *name) {
1538 for (
auto s : sources) {
1539 if (
auto q = cast<Query>(s); name == s->name() or (
q and findSource(
q->query_graph()->sources(), name)))
1547 Join * findJoin(
const std::vector<Join*> &joins,
const std::vector<DataSource*> &sources) {
1548 std::unordered_set<DataSource*> cmp(sources.begin(), sources.end());
1549 for (
auto j : joins) {
1550 if (cmp == std::unordered_set<DataSource*>(j->sources().begin(), j->sources().end()))
1557 void replace_designators(
QueryGraph *graph,
const std::vector<const ast::Expr*> *origin) {
1559 ReplaceDesignators RP;
1560 auto ds = *graph->
sources().begin();
1561 auto filter_new = ds->filter();
1562 auto expansions = RP.replace(filter_new, origin);
1563 ds->filter_ = filter_new;
1564 graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1566 expansions = RP.replace(graph->
group_by_, origin);
1567 graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1568 expansions = RP.replace(graph->
aggregates_, origin);
1569 graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1571 graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1574 graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1594 needs_grouping_ =
bool(stmt.
group_by) or graph_->aggregates().size() > 0;
1597 if (stmt.group_by) {
1598 auto &group_by = as<const ast::GroupByClause>(*stmt.
group_by);
1599 for (
auto &[grp, alias] : group_by.group_by)
1600 existing_grouping_keys_.emplace_back(*grp);
1608 auto &FROM = as<ast::FromClause>(*stmt.
from);
1609 for (
auto &tbl : FROM.from) {
1610 if (
auto tok = std::get_if<ast::Token>(&tbl.source)) {
1614 named_sources_.emplace(base.name(), base);
1615 }
else if (
auto stmt = std::get_if<ast::Stmt*>(&tbl.source)) {
1616 M_insist(tbl.alias.text.has_value(),
"every nested statement requires an alias");
1617 auto &select = as<ast::SelectStmt>(**stmt);
1620 auto &Q = graph_->
add_source(tbl.alias.text, std::move(graph));
1622 named_sources_.emplace(tbl.alias.text, Q);
1634 auto &GROUP_BY = as<ast::GroupByClause>(*stmt.
group_by);
1635 for (
auto &[grp, alias] : GROUP_BY.group_by)
1636 graph_->
group_by_.emplace_back(*grp, alias.text);
1640 auto SELECT = as<ast::SelectClause>(stmt.
select.get());
1641 for (
auto &e :
SELECT->expanded_select_all)
1645 for (
auto &s :
SELECT->select) {
1646 if (
auto query = cast<const ast::QueryExpr>(s.first))
1647 nested_queries_in_select.emplace_back(*query, s.second.text);
1649 graph_->
projections_.emplace_back(*s.first, s.second.text);
1655 auto &WHERE = as<ast::WhereClause>(*stmt.
where);
1659 for (
auto &clause : cnf_where)
1660 process_selection(clause);
1663 for (
auto e : additional_grouping_keys_) {
1668 for (
auto &[clause, CI] : bound_clauses_) {
1669 if (CI.is_constant()) {
1670 for (
auto &[_, src] : named_sources_)
1671 src.get().update_filter(
cnf::CNF{std::move(clause)});
1672 }
else if (CI.is_selection()) {
1673 auto it = named_sources_.find(*CI.data_sources.begin());
1674 M_insist(it != named_sources_.end(),
"data source with that name was not found");
1675 it->second.get().update_filter(
cnf::CNF{std::move(clause)});
1678 for (
auto src_name : CI.data_sources) {
1679 auto it = named_sources_.find(src_name);
1680 M_insist(it != named_sources_.end(),
"data source with that name was not found");
1681 sources.emplace_back(it->second);
1683 auto J = std::make_unique<Join>(
cnf::CNF{std::move(clause)}, std::move(sources));
1684 auto &ref = *graph_->
joins_.emplace_back(std::move(J));
1685 for (
auto ds : ref.sources())
1686 ds.get().add_join(ref);
1695 for (
auto &clause : cnf_where) {
1696 const auto nested_queries = get_nested_queries(clause);
1697 const auto tables = get_tables(clause);
1698 if (nested_queries.empty()) {
1700 if (tables.empty()) {
1703 for (
auto [_, source] : named_sources_)
1706 }
else if (tables.size() == 1) {
1708 auto t = *begin(tables);
1709 auto ds = named_sources_.at(t);
1717 for (
auto t : tables)
1718 sources.emplace_back(named_sources_.at(t));
1725 M_insist(nested_queries.size() == 1,
"Multiple nested queries in one clause are not supported.");
1726 auto &nested_query = nested_queries.front().get();
1727 auto &nested_stmt = as<const ast::SelectStmt>(*nested_query.query);
1728 auto &nested_select_clause = as<ast::SelectClause>(*nested_stmt.select);
1729 M_insist(not nested_select_clause.select_all,
"* can not yet be used in nested queries.");
1730 M_insist(nested_select_clause.select.size() == 1,
"only a single projection allowed");
1733 auto &single_projection = nested_select_clause.select.front();
1734 single_projection.second.text = C.
pool(
"$res");
1737 auto nested_query_name = nested_query.alias();
1742 if (tables.empty()) {
1746 M_insist(not named_sources_.empty(),
"cannot join nested query to any source");
1748 auto &any_data_source = *named_sources_.begin();
1749 auto &J = graph_->
joins_.emplace_back(
new Join(
cnf::CNF({clause}), {Q, any_data_source.second}));
1751 any_data_source.second.get().add_join(*J);
1756 for (
auto t : tables)
1757 sources.emplace_back(named_sources_.at(t));
1759 for (
auto ds : J->sources())
1760 ds.get().add_join(*J);
1763 named_sources_.emplace(nested_query_name, Q);
1766 if (Q.is_correlated())
1776 auto having_clause = as<ast::HavingClause>(stmt.
having.get());
1778 auto sub_graph = std::exchange(graph_, std::make_unique<QueryGraph>());
1780 sub.update_filter(cnf_having);
1781 named_sources_.emplace(C.
pool(
"HAVING"), sub);
1783 std::swap(graph_->
projections_, sub.query_graph().projections_);
1785 if (sub.is_correlated())
1786 sub.decorrelated_ =
false;
1791 for (
auto &clause : cnf_having) {
1793 auto &nested_queries = CI.nested_queries;
1794 if (nested_queries.empty()) {
1798 M_insist(nested_queries.size() == 1,
"Multiple nested queries in one clause are not yet supported.");
1799 auto &nested_query = nested_queries.front().expr;
1800 auto &nested_stmt = as<const ast::SelectStmt>(*nested_query.query);
1801 auto &nested_select_clause = as<ast::SelectClause>(*nested_stmt.select);
1802 M_insist(not nested_select_clause.select_all,
"* can not yet be used in nested queries.");
1803 M_insist(nested_select_clause.select.size() == 1);
1806 nested_select_clause.select.front().second.text = C.
pool(
"$res");
1809 auto &q_name = nested_query.alias();
1811 named_sources_.emplace(q_name, Q);
1814 auto &J = graph_->
joins_.emplace_back(
new Join(
cnf::CNF({clause}), {Q, named_sources_.at(C.
pool(
"HAVING"))}));
1815 for (
auto ds : J->sources())
1816 ds.get().add_join(*J);
1822 if (not nested_queries_in_select.empty()
and not stmt.
having and
1825 auto sub_graph = std::exchange(graph_, std::make_unique<QueryGraph>());
1827 named_sources_.emplace(C.
pool(
"SELECT"), sub);
1829 std::swap(graph_->
projections_, sub.query_graph().projections_);
1831 if (sub.is_correlated())
1832 sub.decorrelated_ =
false;
1837 for (
auto [_query, name] : nested_queries_in_select) {
1838 auto &query = _query.get();
1840 auto &select = as<ast::SelectStmt>(*query.query);
1841 auto &select_clause = as<ast::SelectClause>(*select.select);
1842 M_insist(not select_clause.select_all,
"* can not yet be used in nested queries.");
1843 M_insist(select_clause.select.size() == 1);
1845 select_clause.select.front().second.text = C.
pool(
"$res");
1848 auto &q_name = query.
alias();
1849 auto &Q = graph_->
add_source(q_name, std::move(sub));
1851 if (not named_sources_.empty()) {
1855 source = &named_sources_.at(C.
pool(
"SELECT")).get();
1856 }
catch (std::out_of_range&) {
try {
1857 source = &named_sources_.at(C.
pool(
"HAVING")).get();
1858 }
catch (std::out_of_range&) {
1859 source = &named_sources_.begin()->second.get();
1865 named_sources_.emplace(q_name, Q);
1871 auto ORDER_BY = as<ast::OrderByClause>(stmt.
order_by.get());
1872 for (
auto &o : ORDER_BY->order_by)
1873 graph_->
order_by_.emplace_back(*o.first, o.second);
1878 auto LIMIT = as<ast::LimitClause>(stmt.
limit.get());
1880 graph_->
limit_.
limit = strtoull(*(LIMIT->limit.text),
nullptr, 0);
1882 if (LIMIT->offset) {
1883 graph_->
limit_.
offset = strtoull(*(LIMIT->offset.text),
nullptr, 0);
1902 return builder.
get();
1920 for (
auto &join :
joins()) {
1921 if (join->sources().size() != 2)
1922 throw std::invalid_argument(
"building adjacency matrix for non-binary join");
1924 auto i = join->sources()[0].get().id();
1925 auto j = join->sources()[1].get().id();
1932 out <<
"graph query_graph\n{\n"
1933 <<
" forcelabels=true;\n"
1934 <<
" overlap=false;\n"
1935 <<
" labeljust=\"l\";\n"
1936 <<
" graph [compound=true];\n"
1937 <<
" graph [fontname = \"DejaVu Sans\"];\n"
1938 <<
" node [fontname = \"DejaVu Sans\"];\n"
1939 <<
" edge [fontname = \"DejaVu Sans\"];\n";
1943 out <<
'}' << std::endl;
1948#define q(X) '"' << X << '"'
1949#define id(X) q(std::hex << &X << std::dec)
1951 if (
auto q = cast<Query>(ds.get()))
1952 q->query_graph().dot_recursive(out);
1955 out <<
"\n subgraph cluster_" <<
this <<
" {\n";
1958 out <<
" " <<
id(*ds) <<
" [label=<";
1959 out <<
"<B>" << ds->name() <<
"</B>";
1960 if (ds->filter().size())
1961 out <<
"<BR/><FONT COLOR=\"0.0 0.0 0.25\" POINT-SIZE=\"10\">"
1964 out <<
">,style=filled,fillcolor=\"0.0 0.0 0.8\"];\n";
1965 if (
auto q = cast<Query>(ds.get()))
1966 out <<
" " <<
id(*ds) <<
" -- \"cluster_" << &
q->query_graph() <<
"\";\n";
1969 for (
auto &j :
joins()) {
1970 out <<
" " <<
id(*j) <<
" [label=<" <<
html_escape(
to_string(j->condition())) <<
">,style=filled,fillcolor=\"0.0 0.0 0.95\"];\n";
1971 for (
auto ds : j->sources())
1972 out <<
" " <<
id(*j) <<
" -- " <<
id(ds.get()) <<
";\n";
1976 <<
"<TABLE BORDER=\"0\" CELLPADDING=\"0\" CELLSPACING=\"0\">\n";
1980 out <<
" <TR><TD ALIGN=\"LEFT\">\n"
1981 <<
" <B>λ</B><FONT POINT-SIZE=\"9\">"
1989 out <<
" <TR><TD ALIGN=\"LEFT\">\n"
1990 <<
" <B>ω</B><FONT POINT-SIZE=\"9\">";
1992 if (it !=
order_by_.begin()) out <<
", ";
1994 out <<
' ' << (it->second ?
"ASC" :
"DESC");
2002 out <<
" <TR><TD ALIGN=\"LEFT\">\n"
2003 <<
" <B>π</B><FONT POINT-SIZE=\"9\">";
2008 if (it->second.has_value())
2017 out <<
" <TR><TD ALIGN=\"LEFT\">\n"
2018 <<
" <B>γ</B><FONT POINT-SIZE=\"9\">";
2020 if (it !=
group_by_.begin()) out <<
", ";
2022 if (it->second.has_value())
2035 out <<
" </TABLE>\n"
2046 out <<
';' << std::endl;
2052 out <<
"QueryGraph {\n sources:";
2057 if (
auto q = cast<Query>(src.get())) {
2060 auto bt = as<BaseTable>(*src);
2061 out << bt.table().name();
2063 if (src->alias().has_value())
2064 out <<
" AS " << src->alias();
2065 if (not src->filter().empty())
2066 out <<
" WHERE " << src->filter();
2070 if (
joins().empty()) {
2071 out <<
"\n no joins";
2074 for (
auto &j :
joins()) {
2076 auto &srcs = j->sources();
2077 for (
auto it = srcs.begin(), end = srcs.end(); it != end; ++it) {
2078 if (it != srcs.begin()) out <<
' ';
2079 if (it->get().alias().has_value())
2080 out << it->get().alias();
2081 else if (
auto bt = cast<const BaseTable>(&it->get()))
2086 out <<
"} " << j->condition();
2092 out <<
"\n no grouping";
2095 out <<
"\n group by: ";
2096 for (
auto it =
group_by().begin(), end =
group_by().end(); it != end; ++it) {
2099 out << it->first.get();
2100 if (it->second.has_value())
2101 out <<
" AS " << it->second;
2105 out <<
"\n aggregates: ";
2116 out <<
"\n no order";
2118 out <<
"\n order by: ";
2119 for (
auto it =
order_by().begin(), end =
order_by().end(); it != end; ++it) {
2122 out << it->first.get() <<
' ' << (it->second ?
"ASC" :
"DESC");
2127 out <<
"\n projections: ";
2129 if (alias.has_value()) {
2130 out <<
"\n AS " << alias;
2139 out <<
"\n}" << std::endl;
void emplace_back(std::vector< const ast::Expr * > &exprs, const ast::Designator *des)
Like std::vector::emplace_back() but adds only iff des is not already contained in exprs.
void insert(std::vector< const ast::Expr * > &exprs, const std::vector< const ast::Designator * > &insertions)
Like std::vector::insert() but adds only those elements of insertions which are not already contained...
void emplace_back_unique(std::vector< std::pair< const ast::Expr *, PooledOptionalString > > &pairs, const std::pair< const ast::Designator *, PooledOptionalString > &pair)
Like std::vector::emplace_back() but adds only iff pair is not already contained in pairs.
auto get_primary_key(DataSource *source)
Helper structure to compute and provide primary keys.
bool is_composable_of(const ast::Expr &expr, const std::vector< std::reference_wrapper< const ast::Expr > > components)
Computes whether the bound parts of expr are composable of elements in components.
std::vector< std::reference_wrapper< const ast::FnApplicationExpr > > get_aggregates(const ast::SelectStmt &stmt)
Given a SelectStmt stmt, extract the aggregates to compute while grouping.
#define SELECT(XXX, _1, _2, _3, FN,...)
#define M_unreachable(MSG)
CNF M_EXPORT to_CNF(const ast::Expr &e)
Converts the Boolean Expr e to a CNF.
M_EXPORT const version_info & get()
PrimitiveExpr replace(U &&_value)
for(std::size_t idx=1;idx< num_vectors;++idx) res.emplace((vectors_[idx].bitmask()<< uint32_t(idx *vector_type return * res
std::pair<::wasm::Expression *, std::list< std::shared_ptr< Bit > > > move()
Moves the underlying Binaryen ::wasm::Expression and the referenced bits out of this.
bool M_EXPORT equal(const T &first, const U &second)
Checks whether first and second are equal considering permutations.
bool streq(const char *first, const char *second)
bool M_EXPORT contains(const H &haystack, const N &needle)
Checks whether haystack contains needle.
std::string M_EXPORT html_escape(std::string str)
Escapes special characters in a string to be printable in HTML documents.
std::string to_string(const TokenType tt)
auto visit(Callable &&callable, Base &obj, m::tag< Callable > &&=m::tag< Callable >())
Generic implementation to visit a class hierarchy, with similar syntax as std::visit.
Helper structure to extract the QueryExprs in an expression.
void operator()(Const< ast::Constant > &) override
void operator()(Const< ast::Designator > &) override
void operator()(Const< ast::QueryExpr > &e) override
void operator()(Const< ast::ErrorExpr > &) override
void operator()(Const< ast::UnaryExpr > &e) override
void operator()(Const< ast::FnApplicationExpr > &) override
std::vector< const ast::QueryExpr * > get()
std::vector< const ast::QueryExpr * > queries_
void operator()(Const< ast::BinaryExpr > &e) override
The catalog contains all Databases and keeps track of all meta information of the database system.
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
static Catalog & Get()
Return a reference to the single Catalog instance.
Collects information of a cnf::Clause.
ClauseInfo(const cnf::Clause &clause)
Collects correlation information of all cnf::Clauses occurring in the QueryGraph.
std::unordered_set< ThreadSafePooledString > data_sources
std::vector< SubqueryInfo > nested_queries
A DataSource in a QueryGraph.
std::size_t id_
unique identifier of this data source within its query graph
virtual ThreadSafePooledOptionalString name() const =0
Returns the name of this DataSource.
const ThreadSafePooledOptionalString & alias() const
Returns the alias of this DataSource.
const auto & joins() const
Returns a reference to the Joins using this DataSource.
void update_filter(cnf::CNF filter)
Adds filter to the current filter of this DataSource by logical conjunction.
void add_join(Join &join)
Adds join to the set of Joins of this DataSource.
bool decorrelated_
indicates whether this source is already decorrelated
std::unique_ptr< QueryGraph > graph_
the query graph that is being constructed
std::unordered_map< ThreadSafePooledString, std::reference_wrapper< DataSource > > named_sources_
maps DataSource names/aliases to the DataSource instance
clause_map deferred_clauses_
to be handled by an outer query
std::vector< std::reference_wrapper< const ast::Expr > > existing_grouping_keys_
the grouping keys of the statement
void operator()(Const< ast::Stmt > &s)
std::unordered_set< std::reference_wrapper< const ast::Expr > > additional_grouping_keys_
additionally required grouping keys to perform decorrelation
clause_map bound_clauses_
to be handled by the current query
bool needs_grouping_
whether this graph needs grouping; either by explicily grouping or implicitly by using aggregations
std::unique_ptr< QueryGraph > get()
returns the constructed QueryGraph
void process_selection(cnf::Clause &clause)
Computes correlation information of clause.
A Join in a QueryGraph combines DataSources by a join condition.
std::vector< std::reference_wrapper< DataSource > > sources_t
A data type representing a pooled (or internalized) object.
Translates a query graph in SQL.
The query graph represents all data sources and joins in a graph structure.
std::vector< order_type > order_by_
the order
struct m::QueryGraph::@0 limit_
limit: limit and offset
std::vector< std::unique_ptr< Join > > joins_
collection of all joins in this query graph
const auto & group_by() const
void add_source(std::unique_ptr< DataSource > source)
bool is_correlated() const
Returns true iff the graph is correlated, i.e.
const std::vector< projection_type > & projections() const
bool grouping() const
Returns true iff the graph contains a grouping.
const auto & joins() const
std::size_t num_sources() const
Returns the number of DataSources in this graph.
void dot(std::ostream &out) const
Translates the query graph to dot.
void compute_adjacency_matrix() const
const auto & aggregates() const
void sql(std::ostream &out) const
Translates the query graph to SQL.
void remove_join(Join &join)
const auto & order_by() const
std::vector< std::reference_wrapper< const ast::FnApplicationExpr > > aggregates_
the aggregates to compute
const auto & sources() const
std::vector< projection_type > projections_
the data to compute
void dot_recursive(std::ostream &out) const
std::unique_ptr< AdjacencyMatrix > adjacency_matrix_
std::vector< group_type > group_by_
the grouping keys
std::vector< std::unique_ptr< DataSource > > sources_
collection of all data sources in this query graph
static std::unique_ptr< QueryGraph > Build(const ast::Stmt &stmt)
A Query in a QueryGraph is a DataSource that represents a nested query.
bool is_correlated() const override
Returns true iff the data source is correlated.
std::unique_ptr< QueryGraph > query_graph_
query graph of the sub-query
QueryGraph & query_graph() const
Returns a reference to the internal QueryGraph.
Dumps a textual representation of the AST to an output stream.
ThreadSafePooledString get_table_name() const
bool has_explicit_table_name() const
bool has_table_name() const
const target_type & target() const
const Type * type() const
Returns the Type of this Expr.
A query expression for nested queries.
std::unique_ptr< Clause > from
std::unique_ptr< Clause > select
std::unique_ptr< Clause > where
std::unique_ptr< Clause > order_by
std::unique_ptr< Clause > having
std::unique_ptr< Clause > group_by
std::unique_ptr< Clause > limit
A unary expression: "+e", "-e", "~e", "NOT e".
A CNF represents a conjunction of cnf::Clauses.
A cnf::Clause represents a disjunction of Predicates.
static Predicate Create(const ast::Expr *e, bool is_negative)
Creates a Predicate from e.
Signals that an argument to a function of method was invalid.
Exception class which can be thrown to skip recursion of the subtree in pre-order visitors.