8#include <unordered_map>
15std::unique_ptr<DatabaseCommand>
Sema::analyze(std::unique_ptr<ast::Command> ast)
35 Token(tok.
pos, std::move(name), TK_IDENTIFIER));
36 new_designator->type_ = target.
type();
37 new_designator->target_ = ⌖
38 return new_designator;
45 std::unique_ptr<Designator> new_designator;
46 if (
auto d = cast<const Designator>(&name)) {
48 new_designator = std::make_unique<Designator>(d->tok, std::move(table_name), d->attr_name);
53 new_designator = std::make_unique<Designator>(std::move(tok));
56 new_designator->type_ = target.
type();
57 new_designator->target_ = ⌖
58 return new_designator;
64 to_replace = std::move(new_designator);
73 context_stack_t::reverse_iterator binding_ctx)
75 if (current_ctx == binding_ctx)
return {};
78 for (
auto it = current_ctx; it != binding_ctx; ++it) {
79 if (it != current_ctx)
oss <<
'.';
80 M_insist((*it)->alias.has_value(),
"nested queries must have an alias");
85 return C.pool(
oss.str().c_str());
89 const std::vector<std::reference_wrapper<ast::Expr>> components)
93 return d.contains_free_variables() or d.is_identifier();
97 for (
auto &arg : e.args) {
107 [](
auto&) ->
bool {
return true; },
110 for (
auto c : components)
111 if (expr == c.get())
return true;
115void Sema::compose_of(std::unique_ptr<ast::Expr> &ptr,
const std::vector<std::reference_wrapper<ast::Expr>> components)
119 return d.contains_free_variables() or d.is_identifier();
123 for (
auto &arg : e.args) {
133 [](
auto&) ->
bool {
return true; },
136 for (
auto c : components) {
158 auto pooled_name = C.
pool(
oss.str().c_str());
162 auto [begin, end] = current_ctx->results.equal_range(pooled_name);
163 if (std::distance(begin, end) > 1) {
164 diag.
e(e.tok.pos) <<
"Designator " << e <<
" is ambiguous, multiple occurrences in SELECT clause.\n";
167 }
else if (std::distance(begin, end) == 1) {
168 SemaContext::result_t &result = begin->second;
169 if (
auto d = cast<Designator>(&result.expr()); d
and not result.alias.has_value())
170 e.table_name.text = d->table_name.text;
171 e.type_ = result.expr().type();
172 e.target_ = &result.expr();
179 auto [begin, end] = current_ctx->grouping_keys.equal_range(pooled_name);
180 if (std::distance(begin, end) > 1) {
181 diag.
e(e.tok.pos) <<
"Designator " << e <<
" is ambiguous, multiple occurrences in GROUP BY clause.\n";
184 }
else if (std::distance(begin, end) == 1) {
185 auto &referenced_expr = begin->second.get();
186 e.type_ = referenced_expr.type();
187 if (
auto pt = cast<const PrimitiveType>(e.type()))
188 e.type_ = pt->as_scalar();
190 M_insist(e.type()->is_error(),
"grouping expression must be of primitive type");
191 e.target_ = &referenced_expr;
197 decltype(
contexts_)::reverse_iterator found_ctx;
198 bool is_result =
false;
207 for (
auto end =
contexts_.rend(); it != end; ++it) {
209 src = (*it)->sources.at(e.table_name.text.assert_not_none()).first;
211 }
catch (std::out_of_range) {
217 diag.
e(e.table_name.pos) <<
"Source table " << e.table_name.text
218 <<
" not found. Maybe you forgot to specify it in the FROM clause?\n";
226 if (
auto ref = std::get_if<std::reference_wrapper<const Table>>(&src)) {
227 const Table &tbl = ref->get();
230 target = &tbl.
at(attr_name);
231 }
catch (std::out_of_range) {
232 diag.
e(e.attr_name.pos) <<
"Table " << e.table_name.text <<
" has no attribute " << attr_name <<
".\n";
236 }
else if (
auto T = std::get_if<SemaContext::named_expr_table>(&src)) {
239 auto [begin, end] = tbl.equal_range(attr_name);
241 diag.
e(e.attr_name.pos) <<
"Source " << e.table_name.text <<
" has no attribute " << attr_name <<
".\n";
244 }
else if (std::distance(begin, end) > 1) {
245 diag.
e(e.attr_name.pos) <<
"Source " << e.table_name.text <<
" has multiple attributes " << attr_name
250 target = &begin->second.first.get();
256 e.set_binding_depth(std::distance(
contexts_.rbegin(), found_ctx));
261 if (
auto [begin, end] = current_ctx->results.equal_range(attr_name);
265 if (std::distance(begin, end) > 1) {
266 diag.
e(e.attr_name.pos) <<
"Attribute specifier " << attr_name <<
" is ambiguous.\n";
270 M_insist(std::distance(begin, end) == 1);
271 SemaContext::result_t &result = begin->second;
272 e.target_ = &result.expr();
273 if (
auto d = cast<Designator>(&result.expr()); d
and d->attr_name.text == attr_name)
274 e.table_name.text = d->table_name.text;
275 e.set_binding_depth(0);
286 for (
auto &src : (*it)->sources) {
287 if (
auto ref = std::get_if<std::reference_wrapper<const Table>>(&src.second.first)) {
288 const Table &tbl = ref->get();
291 if (not std::holds_alternative<std::monostate>(target)) {
293 diag.
e(e.attr_name.pos) <<
"Attribute specifier " << attr_name <<
" is ambiguous.\n";
302 }
catch (std::out_of_range) {
305 }
else if (
auto T = std::get_if<SemaContext::named_expr_table>(&src.second.first)) {
307 auto [begin, end] = tbl.equal_range(attr_name);
310 }
else if (std::distance(begin, end) > 1) {
311 diag.
e(e.attr_name.pos) <<
"Attribute specifier " << attr_name <<
" is ambiguous.\n";
315 M_insist(std::distance(begin, end) == 1);
316 if (not std::holds_alternative<std::monostate>(target)) {
318 diag.
e(e.attr_name.pos) <<
"Attribute specifier " << attr_name <<
" is ambiguous.\n";
323 target = &begin->second.first.get();
333 if (not std::holds_alternative<std::monostate>(target))
338 if (std::holds_alternative<std::monostate>(target)) {
339 diag.
e(e.attr_name.pos) <<
"Attribute " << attr_name <<
" not found.\n";
345 e.table_name.text = alias;
346 e.set_binding_depth(std::distance(
contexts_.rbegin(), found_ctx));
354 const Type * operator()(std::monostate&)
const {
M_unreachable(
"target not set"); }
356 const Type * operator()(
const Expr *expr)
const {
return expr->type_; }
358 const PrimitiveType *pt = cast<const PrimitiveType>(std::visit(get_type(), e.target_));
365 for (
auto it =
contexts_.rbegin(); it != found_ctx; ++it) {
368 diag.
e(e.attr_name.pos) <<
"Correlated attributes are not allowed in the FROM clause.\n";
374 switch ((*found_ctx)->stage) {
380 diag.
e(e.attr_name.pos) <<
"Correlated attributes are not allowed in the FROM clause.\n";
405 case TK_STRING_LITERAL:
410 int year, month, day;
411 sscanf(*e.
tok.
text,
"d'%d-%d-%d'", &year, &month, &day);
413 diag.
e(e.
tok.
pos) << e <<
" has invalid year (after year -1 (1 BC) follows year 1 (1 AD)).\n";
417 if (month < 1 or month > 12) {
422 if (day < 1 or (month == 2
and day > 29)
423 or ((month == 4 or month == 6 or month == 9 or month == 11)
and day > 30)
424 or ((month == 1 or month == 3 or month == 5 or month == 7 or month == 8 or month == 10 or month == 12)
435 int year, month, day, hour, minute, second;
436 sscanf(*e.
tok.
text,
"d'%d-%d-%d %d:%d:%d'", &year, &month, &day, &hour, &minute, &second);
438 diag.
e(e.
tok.
pos) << e <<
" has invalid year (after year -1 (1 BC) follows year 1 (1 AD)).\n";
442 if (month < 1 or month > 12) {
447 if (day < 1 or (month == 2
and day > 29)
448 or ((month == 4 or month == 6 or month == 9 or month == 11)
and day > 30)
449 or ((month == 1 or month == 3 or month == 5 or month == 7 or month == 8 or month == 10 or month == 12)
463 diag.
e(e.
tok.
pos) << e <<
" has invalid minute.\n";
469 diag.
e(e.
tok.
pos) << e <<
" has invalid second.\n";
488 if (value == int32_t(value))
508 auto d = cast<Designator>(e.
fn.get());
509 if (not d or not d->is_identifier()) {
510 diag.
e(d->attr_name.pos) << *d <<
" is not a valid function.\n";
515 M_insist(not d->type_,
"This identifier has already been analyzed.");
518 for (
auto &arg : e.
args)
525 e.
func_ = DB.get_function(d->attr_name.text.assert_not_none());
526 }
catch (std::out_of_range) {
527 diag.
e(d->attr_name.pos) <<
"Function " << d->attr_name.text <<
" is not defined in database " << DB.name
535 }
catch (std::out_of_range) {
536 diag.
e(d->attr_name.pos) <<
"Function " << d->attr_name.text <<
" is not defined.\n";
550 diag.
e(d->attr_name.pos) <<
"User-defined functions are not yet supported.\n";
554 case Function::FN_MIN:
555 case Function::FN_MAX:
556 case Function::FN_SUM:
557 case Function::FN_AVG: {
558 if (e.
args.size() == 0) {
559 diag.
e(d->attr_name.pos) <<
"Missing argument for aggregate " << *d <<
".\n";
563 if (e.
args.size() > 1) {
564 diag.
e(d->attr_name.pos) <<
"Too many arguments for aggregate " << *d <<
".\n";
569 auto &arg = *e.
args[0];
570 if (arg.type()->is_error()) {
575 if (not arg.type()->is_numeric()) {
577 diag.
e(d->attr_name.pos) <<
"Argument of aggregate function must be of numeric type.\n";
582 const Numeric *arg_type = cast<const Numeric>(arg.type());
584 diag.
w(d->attr_name.pos) <<
"Argument of aggregate is not of vectorial type. "
585 "(Aggregates over scalars are discouraged.)\n";
592 case Function::FN_MIN:
593 case Function::FN_MAX: {
600 case Function::FN_AVG: {
607 case Function::FN_SUM: {
609 switch (arg_type->kind) {
614 case Numeric::N_Float:
618 case Numeric::N_Decimal:
630 case Function::FN_COUNT: {
631 if (e.
args.size() > 1) {
632 diag.
e(d->attr_name.pos) <<
"Too many arguments for aggregate " << *d <<
".\n";
644 case Function::FN_ISNULL: {
645 if (e.
args.size() == 0) {
646 diag.
e(d->attr_name.pos) <<
"Missing argument for aggregate " << *d <<
".\n";
650 if (e.
args.size() > 1) {
651 diag.
e(d->attr_name.pos) <<
"Too many arguments for aggregate " << *d <<
".\n";
656 auto &arg = *e.
args[0];
658 if (arg.type()->is_error()) {
662 const PrimitiveType *arg_type = cast<const PrimitiveType>(arg.type());
664 diag.
e(d->attr_name.pos) <<
"Function ISNULL can only be applied to expressions of primitive type.\n";
676 M_insist(d->type()->is_error() or cast<const FnType>(d->type()));
683 M_unreachable(
"Function application in FROM clause is impossible");
687 diag.
e(d->attr_name.pos) <<
"Aggregate functions are not allowed in WHERE clause.\n";
694 diag.
e(d->attr_name.pos) <<
"Aggregate functions are not allowed in GROUP BY clause.\n";
723 if (e.
expr->type()->is_error()) {
733 if (not e.
expr->type()->is_boolean()) {
734 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
" must be boolean.\n";
743 if (not e.
expr->type()->is_numeric()) {
744 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
" must be numeric.\n";
761 if (e.
lhs->type()->is_error() or e.
rhs->type()->is_error()) {
779 const Numeric *ty_lhs = cast<const Numeric>(e.
lhs->type());
780 const Numeric *ty_rhs = cast<const Numeric>(e.
rhs->type());
781 if (not ty_lhs or not ty_rhs) {
782 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
", operands must be of numeric type.\n";
796 auto ty_lhs = cast<const CharacterSequence>(e.
lhs->type());
797 auto ty_rhs = cast<const CharacterSequence>(e.
rhs->type());
798 if (not ty_lhs or not ty_rhs) {
799 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
", concatenation requires string operands.\n";
807 Type::category_t c = std::max(ty_lhs->
category, ty_rhs->category);
816 case TK_GREATER_EQUAL: {
817 if (
auto ty_lhs = cast<const Numeric>(e.
lhs->type())) {
819 auto ty_rhs = cast<const Numeric>(e.
rhs->type());
820 if (not ty_lhs or not ty_rhs) {
821 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
", both operands must be of numeric type.\n";
829 Type::category_t c = std::max(ty_lhs->
category, ty_rhs->category);
834 }
else if (
auto ty_lhs = cast<const CharacterSequence>(e.
lhs->type())) {
836 auto ty_rhs = cast<const CharacterSequence>(e.
rhs->type());
837 if (not ty_lhs or not ty_rhs) {
838 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
", both operands must be strings.\n";
846 Type::category_t c = std::max(ty_lhs->
category, ty_rhs->category);
850 }
else if (
auto ty_lhs = cast<const Date>(e.
lhs->type())) {
852 auto ty_rhs = cast<const Date>(e.
rhs->type());
853 if (not ty_lhs or not ty_rhs) {
854 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
", both operands must be dates.\n";
862 Type::category_t c = std::max(ty_lhs->
category, ty_rhs->category);
866 }
else if (
auto ty_lhs = cast<const DateTime>(e.
lhs->type())) {
868 auto ty_rhs = cast<const DateTime>(e.
rhs->type());
869 if (not ty_lhs or not ty_rhs) {
870 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
", both operands must be datetimes.\n";
878 Type::category_t c = std::max(ty_lhs->
category, ty_rhs->category);
883 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
", operator not supported for given operands.\n";
891 case TK_BANG_EQUAL: {
893 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
", operands are incomparable.\n";
905 if (
auto ty_lhs = cast<const Numeric>(e.
lhs->type()))
911 auto ty_lhs = cast<const CharacterSequence>(e.
lhs->type());
912 auto ty_rhs = cast<const CharacterSequence>(e.
rhs->type());
913 if (not ty_lhs or not ty_rhs) {
914 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
", operands must be character sequences.\n";
922 Type::category_t c = std::max(ty_lhs->
category, ty_rhs->category);
931 const Boolean *ty_lhs = cast<const Boolean>(e.
lhs->type());
932 const Boolean *ty_rhs = cast<const Boolean>(e.
rhs->type());
935 if (not ty_lhs or not ty_rhs) {
936 diag.
e(e.
op().
pos) <<
"Invalid expression " << e <<
", operands must be of boolean type.\n";
942 Type::category_t c = std::max(ty_lhs->
category, ty_rhs->category);
953 M_insist(is<SelectStmt>(*e.
query),
"nested statements are always select statements");
964 if (1 != inner_ctx.results.size()) {
965 diag.
e(e.
tok.
pos) <<
"Invalid expression:\n" << e <<
",\nnested statement must return a single column.\n";
969 M_insist(1 == inner_ctx.results.size());
970 Expr &
res = inner_ctx.results.begin()->second.expr();
972 if (not
res.type()->is_primitive()) {
973 diag.
e(e.
tok.
pos) <<
"Invalid expression:\n" << e <<
",\nnested statement must return a primitive value.\n";
977 auto *pt = as<const PrimitiveType>(
res.type_);
982 diag.
e(e.
tok.
pos) <<
"Nested statements are not allowed in this stage.\n";
995 <<
",\nnested statement must return a scalar value.\n";
1000 auto is_fn = is<FnApplicationExpr>(res);
1001 auto is_const =
res.is_constant();
1002 auto &
q = as<const SelectStmt>(*e.
query);
1005 if (not(is_const
and not
q.from)
and not(is_fn
and not
q.group_by)) {
1007 <<
",\nnested statement must return a single value.\n";
1029 bool has_vectorial =
false;
1030 bool has_scalar =
false;
1031 uint64_t const_counter = 0;
1033 unsigned result_counter = 0;
1037 auto &stmt = as<const SelectStmt>(Ctx.stmt);
1039 if (stmt.group_by) {
1041 auto &group_by = as<const GroupByClause>(*stmt.group_by);
1042 has_scalar = has_scalar or not group_by.group_by.empty();
1043 for (
auto &[expr, alias] : group_by.group_by) {
1044 std::unique_ptr<Designator> d;
1047 }
else if (
auto D = cast<const ast::Designator>(
expr.get())) {
1050 std::ostringstream
oss;
1054 if (
auto ty = cast<const PrimitiveType>(d->type()))
1055 d->type_ = ty->as_scalar();
1057 M_insist(d->type()->is_error(),
"grouping key must be of primitive type");
1058 auto attr_name = d->attr_name.text.assert_not_none();
1061 Ctx.results.emplace(std::move(attr_name), SemaContext::result_t(*ref, result_counter++, alias.text));
1063 }
else if (stmt.having) {
1066 diag.
w(c.
select_all.
pos) <<
"The '*' has no meaning in this query. Did you forget the GROUP BY clause?.\n";
1069 for (
auto &[src_name, src] : Ctx.sources) {
1070 if (
auto ref = std::get_if<std::reference_wrapper<const Table>>(&src.first)) {
1072 auto &tbl = ref->get();
1073 for (
auto &attr : tbl) {
1083 Ctx.results.emplace(attr.name, SemaContext::result_t(*ref, result_counter++));
1085 has_vectorial =
true;
1088 auto &named_exprs = std::get<SemaContext::named_expr_table>(src.first);
1089 std::vector<std::unique_ptr<Expr>> expanded_select_all(named_exprs.size());
1090 for (
auto &[name, expr_w_pos] : named_exprs) {
1091 auto &[
expr, pos] = expr_w_pos;
1099 auto &ref = (expanded_select_all[pos] = std::move(d));
1101 if (
auto pt = cast<const PrimitiveType>(ref->type())) {
1102 has_scalar = has_scalar or pt->
is_scalar();
1105 M_insist(ref->type()->is_error(),
"result of nested query must be of primitive type");
1107 Ctx.results.emplace(name, SemaContext::result_t(*ref, result_counter + pos));
1109 result_counter += named_exprs.size();
1116 for (
auto it = c.
select.begin(), end = c.
select.end(); it != end; ++it) {
1117 auto &select_expr = *it->first;
1118 auto alias = it->second;
1120 (*this)(select_expr);
1121 if (select_expr.contains_free_variables()
and not is<QueryExpr>(select_expr))
1122 diag.
e(select_expr.tok.pos) << select_expr <<
" contains free variables (not yet supported).\n";
1124 if (select_expr.type()->is_error())
continue;
1127 if (
auto pt = cast<const PrimitiveType>(select_expr.type()); Ctx.needs_grouping
and pt
and pt->
is_vectorial()) {
1128 diag.
e(select_expr.tok.pos) << select_expr <<
" is not scalar.\n";
1134 if (not select_expr.is_constant()
and not is<QueryExpr>(select_expr)) {
1135 auto pt = as<const PrimitiveType>(select_expr.type());
1137 has_scalar = has_scalar or pt->
is_scalar();
1142 Ctx.results.emplace(alias.text, SemaContext::result_t(select_expr, result_counter++, alias.text));
1143 auto pred = [&](
const std::pair<std::unique_ptr<Expr>,
Token> &sel) {
1144 return sel.second.text == alias.text;
1146 if (
auto num = std::count_if(c.
select.begin(), it, pred)) {
1150 oss << alias.text <<
"$" << num;
1151 alias.text = C.
pool(
oss.str().c_str());
1153 }
else if (
auto d = cast<Designator>(&select_expr)) {
1155 Ctx.results.emplace(d->attr_name.text, SemaContext::result_t(*d, result_counter++));
1157 M_insist(not is<Designator>(select_expr));
1161 if (select_expr.is_constant())
1162 oss <<
"$const" << const_counter++;
1165 Ctx.results.emplace(C.
pool(
oss.str().c_str()), SemaContext::result_t(select_expr, result_counter++));
1169 if (has_vectorial
and has_scalar)
1170 diag.
e(c.
tok.
pos) <<
"SELECT clause with mixed scalar and vectorial values is forbidden.\n";
1182 unsigned source_counter = 0;
1186 for (
auto &src: c.
from) {
1187 if (
auto name = std::get_if<Token>(&src.source)) {
1189 const Table &
T = DB.get_table(name->text.assert_not_none());
1190 Token table_name = src.alias ? src.alias : *name;
1191 auto res = Ctx.sources.emplace(table_name.
text, std::make_pair(std::ref(T), source_counter++));
1194 for (std::size_t i = 0; i <
contexts_.size() - 1; ++i) {
1201 if (not
res.second or not unique)
1202 diag.
e(table_name.
pos) <<
"Table name " << table_name.
text <<
" already in use.\n";
1204 }
catch (std::out_of_range) {
1205 diag.
e(name->pos) <<
"No table " << name->text <<
" in database " << DB.name <<
".\n";
1208 }
else if (
auto stmt = std::get_if<Stmt*>(&src.source)) {
1209 M_insist(is<SelectStmt>(*stmt),
"nested statements are always select statements");
1212 push_context(**stmt, src.alias.text);
1218 for (
auto &[name, res] : inner_ctx.results)
1219 results.emplace(name, std::make_pair(std::ref(
res.expr()),
res.order));
1222 auto res = Ctx.sources.emplace(src.alias.text, std::make_pair(std::move(results), source_counter++));
1224 for (
auto &[_, result] : inner_ctx.results)
1225 result.expr().type_ = as<const PrimitiveType>(result.expr().type())->as_vectorial();
1228 for (std::size_t i = 0; i <
contexts_.size() - 1; ++i) {
1230 if (
contexts_[i]->sources.contains(src.alias.text.assert_not_none())) {
1235 if (not
res.second or not unique) {
1236 diag.
e(src.alias.pos) <<
"Table name " << src.alias.text <<
" already in use.\n";
1253 if (c.
where->type()->is_error())
1256 const Boolean *ty = cast<const Boolean>(c.
where->type());
1260 diag.
e(c.
tok.
pos) <<
"The expression in the WHERE clause must be of boolean type.\n";
1271 Ctx.needs_grouping =
true;
1272 for (
auto &[expr, alias] : c.
group_by) {
1276 if (
expr->type()->is_error())
1279 if (
expr->contains_free_variables())
1280 diag.
e(
expr->tok.pos) << *
expr <<
" contains free variable(s) (not yet supported).\n";
1286 diag.
e(c.
tok.
pos) <<
"Cannot group by " << *
expr <<
", has invalid type.\n";
1291 if (not pt->is_vectorial()) {
1292 diag.
e(c.
tok.
pos) <<
"Cannot group by " << *
expr <<
". Expressions in the GROUP BY clause must be "
1293 "vectorial, i.e. they must depend on each row separately.\n";
1299 Ctx.grouping_keys.emplace(alias.text, *expr);
1300 }
else if (
auto d = cast<Designator>(
expr.get())) {
1301 Ctx.grouping_keys.emplace(d->attr_name.text, *expr);
1305 Ctx.grouping_keys.emplace(C.
pool(
oss.str().c_str()), *expr);
1314 Ctx.needs_grouping =
true;
1319 if (c.
having->type()->is_error())
1326 diag.
e(c.
tok.
pos) <<
"The expression in the HAVING clause must be of boolean type.\n";
1330 if (not ty->is_scalar()) {
1331 diag.
e(c.
tok.
pos) <<
"The expression in the HAVING clause must be scalar.\n";
1348 if (e->type()->is_error())
continue;
1349 if (e->contains_free_variables())
1350 diag.
e(e->tok.pos) << *e <<
" contains free variable(s) (not yet supported).\n";
1352 auto pt = as<const PrimitiveType>(e->type());
1354 if (Ctx.needs_grouping) {
1356 if (pt->is_vectorial())
1357 diag.
e(c.
tok.
pos) <<
"Cannot order by " << *e <<
", expression must be scalar.\n";
1360 if (pt->is_scalar())
1361 diag.
e(c.
tok.
pos) <<
"Cannot order by " << *e <<
", expression must be vectorial.\n";
1375 if (errno == EINVAL)
1377 else if (errno == ERANGE)
1379 else if (errno != 0)
1385 if (errno == EINVAL)
1387 else if (errno == ERANGE)
1389 else if (errno != 0)
1401 }
catch (std::invalid_argument) {
1416 command_ = std::make_unique<EmptyCommand>();
1421 RequireContext RCtx(
this, s);
1426 command_ = std::make_unique<CreateDatabase>(std::move(db_name));
1433 RequireContext RCtx(
this, s);
1445 command_ = std::make_unique<DropDatabase>(std::move(db_name));
1448 command_ = std::make_unique<EmptyCommand>();
1456 RequireContext RCtx(
this, s);
1461 command_ = std::make_unique<UseDatabase>(std::move(db_name));
1468 RequireContext RCtx(
this, s);
1472 diag.
err() <<
"No database selected.\n";
1486 DB.get_table(table_name);
1487 diag.
e(s.
table_name.
pos) <<
"Table " << table_name <<
" already exists in database " << DB.name <<
".\n";
1488 }
catch (std::out_of_range) {
1493 bool has_primary_key =
false;
1496 const PrimitiveType *ty = cast<const PrimitiveType>(attr->type);
1498 diag.
e(attr->name.pos) <<
"Attribute " << attr->name.text <<
" cannot be defined with type " << *attr->type
1502 attr->type = ty->as_vectorial();
1507 T->push_back(attribute_name, ty->as_vectorial());
1508 }
catch (std::invalid_argument) {
1510 diag.
e(attr->name.pos) <<
"Attribute " << attr->name.text <<
" occurs multiple times in defintion of table "
1511 << table_name <<
".\n";
1515 bool has_reference =
false;
1516 bool is_unique =
false, is_not_null =
false;
1518 for (
auto &c : attr->constraints) {
1519 if (is<PrimaryKeyConstraint>(c)) {
1520 if (has_primary_key)
1521 diag.
e(attr->name.pos) <<
"Duplicate definition of primary key as attribute " << attr->name.text
1523 has_primary_key =
true;
1524 T->add_primary_key(attribute_name);
1527 if (is<UniqueConstraint>(c)) {
1529 diag.
w(c->
tok.
pos) <<
"Duplicate definition of attribute " << attr->name.text <<
" as UNIQUE.\n";
1531 T->at(attribute_name).unique =
true;
1534 if (is<NotNullConstraint>(c)) {
1536 diag.
w(c->
tok.
pos) <<
"Duplicate definition of attribute " << attr->name.text <<
" as NOT NULL.\n";
1538 T->at(attribute_name).not_nullable =
true;
1541 if (
auto check = cast<CheckConditionConstraint>(c)) {
1545 (*this)(*check->cond);
1546 auto ty = check->cond->type();
1547 if (not ty->is_boolean())
1548 diag.
e(check->tok.pos) <<
"Condition " << *check->cond <<
" is an invalid CHECK constraint.\n";
1551 if (
auto ref = cast<ReferenceConstraint>(c)) {
1553 diag.
e(ref->tok.pos) <<
"Attribute " << attr->name.text <<
" must not have multiple references.\n";
1554 has_reference =
true;
1558 auto &ref_table = DB.get_table(ref->table_name.text.assert_not_none());
1560 auto &ref_attr = ref_table.at(ref->attr_name.text.assert_not_none());
1561 if (attr->type != ref_attr.type)
1562 diag.
e(ref->attr_name.pos) <<
"Referenced attribute has different type.\n";
1563 T->at(attr->name.text.assert_not_none()).reference = &ref_attr;
1564 }
catch (std::out_of_range) {
1565 diag.
e(ref->attr_name.pos) <<
"Invalid reference, attribute " << ref->attr_name.text
1566 <<
" not found in table " << ref->table_name.text <<
".\n";
1568 }
catch (std::out_of_range) {
1569 diag.
e(ref->table_name.pos) <<
"Invalid reference, table " << ref->table_name.text
1577 command_ = std::make_unique<CreateTable>(std::move(T));
1582 RequireContext RCtx(
this, s);
1586 diag.
err() <<
"No database selected.\n";
1592 std::vector<ThreadSafePooledString> table_names;
1595 if (DB.has_table(table_name))
1596 table_names.emplace_back(std::move(table_name));
1599 diag.
e(tok->pos) <<
"Table " << table_name <<
" does not exist in database " << DB.name <<
".\n";
1602 diag.
n(tok->pos) <<
"Table " << table_name <<
" does not exist in database " << DB.name <<
". "
1608 command_ = std::make_unique<DropTable>(std::move(table_names));
1613 RequireContext RCtx(
this, s);
1617 diag.
err() <<
"No database selected.\n";
1630 diag.
err() <<
"Indexes without name not supported.\n";
1636 if (DB.has_index(index_name)) {
1638 diag.
w(s.
index_name.
pos) <<
"Index " << index_name <<
" already exists in database " << DB.name
1640 command_ = std::make_unique<EmptyCommand>();
1643 diag.
e(s.
index_name.
pos) <<
"Index " << index_name <<
" already exists in database " << DB.name <<
".\n";
1650 if (not DB.has_table(table_name)) {
1651 diag.
e(s.
table_name.
pos) <<
"Table " << table_name <<
" does not exist in database " << DB.name <<
"\n.";
1654 auto &table = DB.get_table(table_name);
1682 diag.
err() <<
"More than one key field for indexes not supported.\n";
1688 auto field = it->get();
1689 if (
auto d = cast<Designator>(field)) {
1690 if (not table.has_attribute(d->attr_name.text.assert_not_none())) {
1691 diag.
e(d->tok.pos) <<
"Attribute " << d->attr_name.text <<
" does not exists in table "
1692 << table_name <<
".\n";
1696 diag.
e(field->tok.pos) <<
"Non-attribute key fields for indexes not supported.\n";
1700 auto attribute_name = cast<Designator>(s.
key_fields.front())->attr_name.text.assert_not_none();
1701 auto &attribute = table.at(attribute_name);
1704 std::unique_ptr<idx::IndexBase> index;
1705 auto make_index = [&]<
template<
typename>
typename Index,
typename Key>() {
1706 if constexpr(
requires {
typename Index<Key>; }) {
1707 return std::make_unique<Index<Key>>();
1709 diag(s.
method.
pos) <<
"Index method not available for given key type.\n";
1713 auto set_index = [&]<
template<
typename>
typename Index>() {
1715 [&](
const Boolean&) { index = make_index.operator()<Index,
bool>(); },
1718 case Numeric::N_Int:
1719 case Numeric::N_Decimal:
1722 case 8: index = make_index.operator()<Index, int8_t>();
break;
1723 case 16: index = make_index.operator()<Index, int16_t>();
break;
1724 case 32: index = make_index.operator()<Index, int32_t>();
break;
1725 case 64: index = make_index.operator()<Index, int64_t>();
break;
1728 case Numeric::N_Float:
1731 case 32: index = make_index.operator()<Index,
float>();
break;
1732 case 64: index = make_index.operator()<Index,
double>();
break;
1736 [&](
const CharacterSequence&) { index = make_index.operator()<Index,
const char*>(); },
1737 [&](
const Date&) { index = std::make_unique<Index<int32_t>>(); },
1738 [&](
const DateTime&) { index = std::make_unique<Index<int64_t>>(); },
1740 }, *attribute.type);
1756 command_ = std::make_unique<CreateIndex>(std::move(index), std::move(table_name), std::move(attribute_name),
1757 std::move(index_name));
1762 RequireContext RCtx(
this, s);
1766 diag.
err() <<
"No database selected.\n";
1772 std::vector<ThreadSafePooledString> index_names;
1774 auto index_name = tok->text.assert_not_none();
1775 if (DB.has_index(index_name))
1776 index_names.emplace_back(index_name);
1779 diag.
e(tok->pos) <<
"Index " << index_name <<
" does not exist in database " << DB.name <<
".\n";
1782 diag.
w(tok->pos) <<
"Index " << index_name <<
" does not exist in database " << DB.name <<
". "
1788 command_ = std::make_unique<DropIndex>(std::move(index_names));
1793 RequireContext RCtx(
this, s);
1798 diag.
err() <<
"No database selected.\n";
1812 command_ = std::make_unique<QueryDatabase>();
1817 RequireContext RCtx(
this, s);
1829 }
catch (std::out_of_range) {
1835 for (std::size_t i = 0; i != s.
tuples.size(); ++i) {
1843 for (
auto [it, j] = std::tuple{tbl->
begin(), 0}; it != tbl->
end(); ++it, ++j) {
1849 if (v.second->type()->is_error())
continue;
1850 auto ty = as<const PrimitiveType>(v.second->type());
1851 if (ty->is_boolean()
and attr.type->is_boolean())
1853 if (ty->is_character_sequence()
and attr.type->is_character_sequence())
1855 if (ty->is_date()
and attr.type->is_date())
1857 if (ty->is_date_time()
and attr.type->is_date_time())
1859 if (ty->is_numeric()
and attr.type->is_numeric())
1862 << attr.name <<
".\n";
1867 if (attr.not_nullable)
1869 <<
" declared as NOT NULL.\n";
1881 command_ = std::make_unique<InsertRecords>();
1886 RequireContext RCtx(
this, s);
1894 RequireContext RCtx(
this, s);
1902 RequireContext RCtx(
this, s);
1911 const Table *table =
nullptr;
1914 }
catch (std::out_of_range) {
1925#define SET_CHAR(NAME) \
1927 std::string NAME = interpret(*s.NAME.text); \
1928 if (NAME.length() == 1) \
1929 cfg.NAME = NAME[0]; \
1931 diag.e(s.NAME.pos) << "Invalid " #NAME " character " << s.NAME.text << ". Must have length 1.\n"; \
1941 diag.
e(pos) <<
"The delimiter (" << cfg.
delimiter <<
") must differ from the quote character (" << cfg.
quote
1948 diag.
n(s.
path.
pos) <<
"I will assume the existence of a header so I can skip it.\n";
1952 std::filesystem::path path(std::string(*s.
path.
text, 1, strlen(*s.
path.
text) - 2));
1955 command_ = std::make_unique<ImportDSV>(*table, path, std::move(cfg));
#define M_unreachable(MSG)
::wasm::Expression * expr()
Moves the underlying Binaryen ::wasm::Expression out of this.
for(std::size_t idx=1;idx< num_vectors;++idx) res.emplace((vectors_[idx].bitmask()<< uint32_t(idx *vector_type return * res
std::string unique(std::string prefix, unsigned &counter)
Creates a unique name from a given prefix and a counter.
std::string escape(char c)
bool M_EXPORT is_comparable(const Type *first, const Type *second)
Returns true iff both types have the same PrimitiveType, i.e.
const Numeric * arithmetic_join(const Numeric *lhs, const Numeric *rhs)
std::string quote(const std::string &str)
ThreadSafeStringPool::proxy_type ThreadSafePooledString
and arithmetic< U > and same_signedness< T, U > U
ThreadSafeStringPool::proxy_optional_type ThreadSafePooledOptionalString
std::string interpret(const std::string &str, char esc='\\', char quote='"')
auto visit(Callable &&callable, Base &obj, m::tag< Callable > &&=m::tag< Callable >())
Generic implementation to visit a class hierarchy, with similar syntax as std::visit.
const PrimitiveType * type
the type of the attribute
The catalog contains all Databases and keeps track of all meta information of the database system.
Database & get_database_in_use()
Returns a reference to the Database that is currently in use, if any.
bool has_database_in_use() const
Returns true if any Database is currently in use.
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
static Catalog & Get()
Return a reference to the single Catalog instance.
bool has_database(const ThreadSafePooledString &name) const
Returns true iff a Database with the given name exists.
std::unique_ptr< DatabaseInstruction > create_instruction(const ThreadSafePooledString &name, const std::vector< std::string > &args) const
Returns a reference to the DatabaseInstruction with the given name.
const Function * get_function(const ThreadSafePooledString &name) const
Returns a reference to the Function with the given name.
std::unique_ptr< TableFactory > table_factory(std::unique_ptr< TableFactory > table_factory)
Replaces the stored TableFactory with table_factory and returns the old TableFactory.
The type of character strings, both fixed length and varying length.
Configuration parameters for importing a DSV file.
char delimiter
the delimiter separating cells
char quote
the quotation mark for strings
bool skip_header
whether to ignore the headline (requires has_header = true)
bool has_header
whether the first line of the file is a headline describing the columns
std::size_t num_rows
the maximum number of rows to read from the file (may exceed actual number of rows)
ThreadSafePooledString name
the name of the database
std::ostream & e(const Position pos)
std::ostream & n(const Position pos)
unsigned num_errors() const
Returns the number of errors emitted since the last call to clear().
std::ostream & w(const Position pos)
fnid_t fnid
the function id
bool is_aggregate() const
Returns true iff this function is an aggregation, i.e. if it is evaluated on all tuples.
The numeric type represents integer and floating-point types of different precision and scale.
static constexpr std::size_t MAX_DECIMAL_PRECISION
The maximal number of decimal digits that can be accurately represented by DECIMAL(p,...
unsigned scale
the number of decimal digits right of the decimal point
virtual const PrimitiveType * as_scalar() const override
Convert this PrimitiveType to its scalar equivalent.
static Options & Get()
Return a reference to the single Options instance.
A data type representing a pooled (or internalized) object.
Pooled< T, Pool, false > assert_not_none() const
PrimitiveTypes represent Types of values.
bool is_scalar() const
Returns true iff this PrimitiveType is scalar, i.e. if it is for a single value.
virtual const PrimitiveType * as_vectorial() const =0
Convert this PrimitiveType to its vectorial equivalent.
category_t category
whether this type is scalar or vector
bool is_vectorial() const
Returns true iff this PrimitiveType is vectorial, i.e. if it is for a sequence of values.
A table is a sorted set of attributes.
virtual iterator end() const =0
virtual Attribute & at(std::size_t id)=0
Returns the attribute with the given id.
virtual std::size_t num_attrs() const =0
Returns the number of attributes in this table.
virtual iterator begin() const =0
This class represents types in the SQL type system.
static Pooled< CharacterSequence > Get_Char(category_t category, std::size_t length)
Returns a CharacterSequence type of the given category and fixed length.
static Pooled< NoneType > Get_None()
Returns a NoneType.
static Pooled< Numeric > Get_Double(category_t category)
Returns a Numeric type of given category for 64 bit floating-points.
static M_LCOV_EXCL_STOP Pooled< ErrorType > Get_Error()
Returns a ErrorType.
bool is_primitive() const
Returns true iff this Type is a PrimitiveType.
static Pooled< Numeric > Get_Decimal(category_t category, unsigned digits, unsigned scale)
Returns a Numeric type for decimals of given category, decimal digits, and scale.
static Pooled< Date > Get_Date(category_t category)
Returns a Date type of the given category.
static Pooled< Boolean > Get_Boolean(category_t category)
Returns a Boolean type of the given category.
static Pooled< DateTime > Get_Datetime(category_t category)
Returns a DateTime type of the given category.
static Pooled< Numeric > Get_Integer(category_t category, unsigned num_bytes)
Returns a Numeric type for integrals of given category and num_bytes bytes.
static Pooled< FnType > Get_Function(const Type *return_type, std::vector< const Type * > parameter_types)
Returns a FnType for a function with parameter types parameter_types and return type return_type.
std::unique_ptr< Expr > lhs
const Numeric * common_operand_type
std::unique_ptr< Expr > rhs
A constant: a string literal or a numeric constant.
std::vector< std::unique_ptr< Expr > > key_fields
std::vector< std::unique_ptr< attribute_definition > > attributes
An import statement for a delimiter separated values (DSV) file.
std::variant< std::monostate, const Expr *, const Attribute * > target_type
std::vector< std::unique_ptr< Token > > index_names
std::vector< std::unique_ptr< Token > > table_names
const Type * type_
the type of an expression, determined by the semantic analysis
const Type * type() const
Returns the Type of this Expr.
Token tok
the token of the expression; serves as an anchor to locate the expression in the source
std::vector< std::unique_ptr< Expr > > args
std::unique_ptr< Expr > fn
std::vector< from_type > from
std::vector< group_type > group_by
std::unique_ptr< Expr > having
std::vector< tuple_t > tuples
Token tok
the token of the Instruction; starts with \
ThreadSafePooledString name
the name of the Instruction (without leading \)
std::vector< std::string > args
the arguments to the Instruction; may be empty
std::vector< order_type > order_by
true means ascending, false means descending
A query expression for nested queries.
std::unique_ptr< Stmt > query
const ThreadSafePooledString & alias() const
std::vector< select_type > select
list of selected elements; expr AS name
std::vector< std::unique_ptr< Expr > > expanded_select_all
std::unique_ptr< Clause > from
std::unique_ptr< Clause > select
std::unique_ptr< Clause > where
std::unique_ptr< Clause > order_by
std::unique_ptr< Clause > having
std::unique_ptr< Clause > group_by
std::unique_ptr< Clause > limit
source_table sources
list of all sources along with their order
std::unordered_multimap< ThreadSafePooledString, std::pair< std::reference_wrapper< Expr >, unsigned > > named_expr_table
list of all computed expressions along with their order
std::variant< std::monostate, std::reference_wrapper< const Table >, named_expr_table > source_type
the type of a source of data: either a database table or a nested query with named results
enum m::ast::Sema::SemaContext::stage_t stage
current stage
std::ostringstream oss
used to create textual representation of complex AST objects, e.g. expressions
ThreadSafePooledOptionalString make_unique_id_from_binding_path(context_stack_t::reverse_iterator current_ctx, context_stack_t::reverse_iterator binding_ctx)
Creates a unique ID from a sequence of SemaContexts by concatenating their aliases.
context_stack_t contexts_
void replace_by_fresh_designator_to(std::unique_ptr< Expr > &to_replace, const Expr &target)
Replaces to_replace by a fresh Designator, that has the same syntactical representation as to_replace...
SemaContext pop_context()
SemaContext & get_context()
std::unique_ptr< DatabaseCommand > command_
the command to execute when semantic analysis completes without errors
bool is_nested() const
Returns true iff the current statement, that is being analyzed, is a nested statement.
std::unique_ptr< Designator > create_designator(ThreadSafePooledString name, Token tok, const Expr &target)
Creates a fresh Designator with the given name at location tok and with target target.
void compose_of(std::unique_ptr< ast::Expr > &ptr, const std::vector< std::reference_wrapper< ast::Expr > > components)
Recursively analyzes the ast::Expr referenced by ptr and replaces subexpressions that can be composed...
std::unique_ptr< DatabaseCommand > analyze(std::unique_ptr< ast::Command > ast)
Perform semantic analysis of an ast::Command.
bool is_composable_of(const ast::Expr &expr, const std::vector< std::reference_wrapper< ast::Expr > > components)
Computes whether the bound parts of expr are composable of elements in components.
ThreadSafePooledOptionalString text
declared as optional for dummy tokens
static Token CreateArtificial(TokenType type=TK_EOF)
A unary expression: "+e", "-e", "~e", "NOT e".
std::unique_ptr< Expr > expr
std::unique_ptr< Expr > where
A simple index based on a sorted array that maps keys to their tuple_id.
A recursive model index with two layers consiting only of linear monels that maps keys to their tuple...