mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
Sema.cpp
Go to the documentation of this file.
1#include "parse/Sema.hpp"
2
3#include <cstdint>
6#include <mutable/Options.hpp>
7#include <sstream>
8#include <unordered_map>
9
10
11using namespace m;
12using namespace m::ast;
13
14
15std::unique_ptr<DatabaseCommand> Sema::analyze(std::unique_ptr<ast::Command> ast)
16{
17 (*this)(*ast); // perform semantic analysis
18 if (command_)
19 command_->ast(std::move(ast)); // move AST into DatabaseCommand instance
20 return std::move(command_);
21}
22
23bool Sema::is_nested() const
24{
25 return contexts_.size() > 1;
26}
27
28/*----------------------------------------------------------------------------------------------------------------------
29 * Sema Designator Helpers
30 *--------------------------------------------------------------------------------------------------------------------*/
31
32std::unique_ptr<Designator> Sema::create_designator(ThreadSafePooledString name, Token tok, const Expr &target)
33{
34 auto new_designator = std::make_unique<Designator>(tok, Token::CreateArtificial(),
35 Token(tok.pos, std::move(name), TK_IDENTIFIER));
36 new_designator->type_ = target.type();
37 new_designator->target_ = &target;
38 return new_designator;
39}
40
41std::unique_ptr<Designator> Sema::create_designator(const Expr &name, const Expr &target, bool drop_table_name)
42{
43 auto &C = Catalog::Get();
44
45 std::unique_ptr<Designator> new_designator;
46 if (auto d = cast<const Designator>(&name)) {
47 Token table_name = drop_table_name ? Token::CreateArtificial() : d->table_name; // possibly drop table name
48 new_designator = std::make_unique<Designator>(d->tok, std::move(table_name), d->attr_name); // copy of `name`
49 } else {
50 oss.str("");
51 oss << name; // stringify `name`
52 Token tok(target.tok.pos, C.pool(oss.str().c_str()), TK_DEC_INT); // fresh identifier
53 new_designator = std::make_unique<Designator>(std::move(tok));
54 }
55
56 new_designator->type_ = target.type();
57 new_designator->target_ = &target;
58 return new_designator;
59}
60
61void Sema::replace_by_fresh_designator_to(std::unique_ptr<Expr> &to_replace, const Expr &target)
62{
63 auto new_designator = create_designator(*to_replace, target);
64 to_replace = std::move(new_designator);
65}
66
67
68/*----------------------------------------------------------------------------------------------------------------------
69 * Other Sema Helpers
70 *--------------------------------------------------------------------------------------------------------------------*/
71
72ThreadSafePooledOptionalString Sema::make_unique_id_from_binding_path(context_stack_t::reverse_iterator current_ctx,
73 context_stack_t::reverse_iterator binding_ctx)
74{
75 if (current_ctx == binding_ctx) return {};
76
77 oss.str("");
78 for (auto it = current_ctx; it != binding_ctx; ++it) {
79 if (it != current_ctx) oss << '.';
80 M_insist((*it)->alias.has_value(), "nested queries must have an alias");
81 oss << (*it)->alias;
82 }
83
84 auto &C = Catalog::Get();
85 return C.pool(oss.str().c_str());
86}
87
89 const std::vector<std::reference_wrapper<ast::Expr>> components)
90{
91 auto recurse = overloaded {
92 [&](const ast::Designator &d) -> bool {
93 return d.contains_free_variables() or d.is_identifier(); // identifiers are implicitly composable, as they never refer into a table XXX do we have to check the target?
94 },
95 [&](const ast::FnApplicationExpr &e) -> bool {
96 if (not is_composable_of(*e.fn, components)) return false;
97 for (auto &arg : e.args) {
98 if (not is_composable_of(*arg, components))
99 return false;
100 }
101 return true;
102 },
103 [&](const ast::UnaryExpr &e) -> bool { return is_composable_of(*e.expr, components); },
104 [&](const ast::BinaryExpr &e) -> bool {
105 return is_composable_of(*e.lhs, components) and is_composable_of(*e.rhs, components);
106 },
107 [](auto&) -> bool { return true; },
108 };
109
110 for (auto c : components)
111 if (expr == c.get()) return true; // syntactically equivalent to a component
112 return visit(recurse, expr, m::tag<m::ast::ConstASTExprVisitor>()); // attempt to recursively compose expr
113}
114
115void Sema::compose_of(std::unique_ptr<ast::Expr> &ptr, const std::vector<std::reference_wrapper<ast::Expr>> components)
116{
117 auto recurse = overloaded {
118 [&](const ast::Designator &d) -> bool {
119 return d.contains_free_variables() or d.is_identifier(); // identifiers are implicitly composable, as they never refer into a table XXX do we have to check the target?
120 },
121 [&](const ast::FnApplicationExpr &e) -> bool {
122 if (not is_composable_of(*e.fn, components)) return false;
123 for (auto &arg : e.args) {
124 if (not is_composable_of(*arg, components))
125 return false;
126 }
127 return true;
128 },
129 [&](const ast::UnaryExpr &e) -> bool { return is_composable_of(*e.expr, components); },
130 [&](const ast::BinaryExpr &e) -> bool {
131 return is_composable_of(*e.lhs, components) and is_composable_of(*e.rhs, components);
132 },
133 [](auto&) -> bool { return true; },
134 };
135
136 for (auto c : components) {
137 if (*ptr == c.get())
138 replace_by_fresh_designator_to(/* to_replace= */ ptr, /* target= */ c.get());
139 }
140 visit(recurse, *ptr, m::tag<m::ast::ConstASTExprVisitor>()); // attempt to recursively compose expr
141}
142
143/*===== Expr =========================================================================================================*/
144
145void Sema::operator()(ErrorExpr &e)
146{
148}
149
150void Sema::operator()(Designator &e)
151{
152 Catalog &C = Catalog::Get();
153 SemaContext *current_ctx = &get_context();
154 auto attr_name = e.attr_name.text.assert_not_none();
155
156 oss.str("");
157 oss << e;
158 auto pooled_name = C.pool(oss.str().c_str());
159
160 /*----- In a stage after SELECT, check whether the `Designator` refers to a value produced by SELECT. -----*/
161 if (current_ctx->stage > SemaContext::S_Select) {
162 auto [begin, end] = current_ctx->results.equal_range(pooled_name);
163 if (std::distance(begin, end) > 1) {
164 diag.e(e.tok.pos) << "Designator " << e << " is ambiguous, multiple occurrences in SELECT clause.\n";
165 e.type_ = Type::Get_Error();
166 return;
167 } else if (std::distance(begin, end) == 1) {
168 SemaContext::result_t &result = begin->second;
169 if (auto d = cast<Designator>(&result.expr()); d and not result.alias.has_value()) // target is a designator
170 e.table_name.text = d->table_name.text; // w/o explicit alias
171 e.type_ = result.expr().type();
172 e.target_ = &result.expr();
173 return;
174 }
175 }
176
177 /*----- In a stage after GROUP BY, check whether the entire expression refers to a grouping key. -----*/
178 if (current_ctx->stage > SemaContext::S_GroupBy and not current_ctx->grouping_keys.empty()) {
179 auto [begin, end] = current_ctx->grouping_keys.equal_range(pooled_name);
180 if (std::distance(begin, end) > 1) {
181 diag.e(e.tok.pos) << "Designator " << e << " is ambiguous, multiple occurrences in GROUP BY clause.\n";
182 e.type_ = Type::Get_Error();
183 return;
184 } else if (std::distance(begin, end) == 1) {
185 auto &referenced_expr = begin->second.get();
186 e.type_ = referenced_expr.type();
187 if (auto pt = cast<const PrimitiveType>(e.type()))
188 e.type_ = pt->as_scalar();
189 else
190 M_insist(e.type()->is_error(), "grouping expression must be of primitive type");
191 e.target_ = &referenced_expr;
192 return;
193 }
194 }
195
196 /*----- Designator was neither a reference to a SELECT or GROUP BY expression. -----*/
197 decltype(contexts_)::reverse_iterator found_ctx; // the context where the designator is found
198 bool is_result = false;
199
200 /* If the designator references an attribute of a table, search for it. */
201 if (e.table_name) {
202 /* Find the source table first and then locate the target inside this table. */
204
205 /* Search all contexts, starting with the innermost and advancing outwards. */
206 auto it = contexts_.rbegin();
207 for (auto end = contexts_.rend(); it != end; ++it) {
208 try {
209 src = (*it)->sources.at(e.table_name.text.assert_not_none()).first;
210 break;
211 } catch (std::out_of_range) {
212 /* The source is not found in this context so iterate over the entire stack. */
213 }
214 }
215
216 if (it == contexts_.rend()) {
217 diag.e(e.table_name.pos) << "Source table " << e.table_name.text
218 << " not found. Maybe you forgot to specify it in the FROM clause?\n";
219 e.type_ = Type::Get_Error();
220 return;
221 }
222 found_ctx = it;
223
224 /* Find the target inside the source table. */
226 if (auto ref = std::get_if<std::reference_wrapper<const Table>>(&src)) {
227 const Table &tbl = ref->get();
228 /* Find the attribute inside the table. */
229 try {
230 target = &tbl.at(attr_name); // we found an attribute of that name in the source tables
231 } catch (std::out_of_range) {
232 diag.e(e.attr_name.pos) << "Table " << e.table_name.text << " has no attribute " << attr_name << ".\n";
233 e.type_ = Type::Get_Error();
234 return;
235 }
236 } else if (auto T = std::get_if<SemaContext::named_expr_table>(&src)) {
237 const SemaContext::named_expr_table &tbl = *T;
238 /* Find expression inside named expression table. */
239 auto [begin, end] = tbl.equal_range(attr_name);
240 if (begin == end) {
241 diag.e(e.attr_name.pos) << "Source " << e.table_name.text << " has no attribute " << attr_name << ".\n";
242 e.type_ = Type::Get_Error();
243 return;
244 } else if (std::distance(begin, end) > 1) {
245 diag.e(e.attr_name.pos) << "Source " << e.table_name.text << " has multiple attributes " << attr_name
246 << ".\n";
247 e.type_ = Type::Get_Error();
248 return;
249 } else {
250 target = &begin->second.first.get();
251 }
252 } else {
253 M_unreachable("invalid variant");
254 }
255 e.target_ = target;
256 e.set_binding_depth(std::distance(contexts_.rbegin(), found_ctx));
257 e.unique_id_ = make_unique_id_from_binding_path(contexts_.rbegin(), found_ctx);
258 } else {
259 /* No table name was specified. The designator references either a result or a named expression. Search the
260 * named expressions first, because they overrule attribute names. */
261 if (auto [begin, end] = current_ctx->results.equal_range(attr_name);
262 current_ctx->stage > SemaContext::S_Select and std::distance(begin, end) >= 1)
263 {
264 /* Found a named expression. */
265 if (std::distance(begin, end) > 1) {
266 diag.e(e.attr_name.pos) << "Attribute specifier " << attr_name << " is ambiguous.\n";
267 e.type_ = Type::Get_Error();
268 return;
269 } else {
270 M_insist(std::distance(begin, end) == 1);
271 SemaContext::result_t &result = begin->second;
272 e.target_ = &result.expr();
273 if (auto d = cast<Designator>(&result.expr()); d and d->attr_name.text == attr_name)
274 e.table_name.text = d->table_name.text;
275 e.set_binding_depth(0); // bound by the current (innermost) context ⇒ bound variable
276 is_result = true;
277 found_ctx = contexts_.rbegin(); // iterator to the current context
278 }
279 } else {
280 /* Since no table was explicitly specified, we must search *all* sources for the attribute. */
283
284 /* Search all contexts, starting with the innermost and advancing outwards. */
285 for (auto it = contexts_.rbegin(), end = contexts_.rend(); it != end; ++it) {
286 for (auto &src : (*it)->sources) {
287 if (auto ref = std::get_if<std::reference_wrapper<const Table>>(&src.second.first)) {
288 const Table &tbl = ref->get();
289 try {
290 const Attribute &A = tbl.at(attr_name);
291 if (not std::holds_alternative<std::monostate>(target)) {
292 /* ambiguous attribute name */
293 diag.e(e.attr_name.pos) << "Attribute specifier " << attr_name << " is ambiguous.\n";
294 // TODO print names of conflicting tables
295 e.type_ = Type::Get_Error();
296 return;
297 } else {
298 target = &A; // we found an attribute of that name in the source tables
299 alias = src.first;
300 found_ctx = it;
301 }
302 } catch (std::out_of_range) {
303 /* This source table has no attribute of that name. OK, continue. */
304 }
305 } else if (auto T = std::get_if<SemaContext::named_expr_table>(&src.second.first)) {
306 const SemaContext::named_expr_table &tbl = *T;
307 auto [begin, end] = tbl.equal_range(attr_name);
308 if (begin == end) {
309 /* This source table has no attribute of that name. OK, continue. */
310 } else if (std::distance(begin, end) > 1) {
311 diag.e(e.attr_name.pos) << "Attribute specifier " << attr_name << " is ambiguous.\n";
312 e.type_ = Type::Get_Error();
313 return;
314 } else {
315 M_insist(std::distance(begin, end) == 1);
316 if (not std::holds_alternative<std::monostate>(target)) {
317 /* ambiguous attribute name */
318 diag.e(e.attr_name.pos) << "Attribute specifier " << attr_name << " is ambiguous.\n";
319 // TODO print names of conflicting tables
320 e.type_ = Type::Get_Error();
321 return;
322 } else {
323 target = &begin->second.first.get(); // we found an attribute of that name in the source tables
324 alias = src.first;
325 found_ctx = it;
326 }
327 }
328 } else {
329 M_unreachable("invalid variant");
330 }
331 }
332 /* If we found target of the designator, abort searching contexts further outside. */
333 if (not std::holds_alternative<std::monostate>(target))
334 break;
335 }
336
337 /* If this designator could not be resolved, emit an error and abort further semantic analysis. */
338 if (std::holds_alternative<std::monostate>(target)) {
339 diag.e(e.attr_name.pos) << "Attribute " << attr_name << " not found.\n";
340 e.type_ = Type::Get_Error();
341 return;
342 }
343
344 e.target_ = target;
345 e.table_name.text = alias; // set the deduced table name of this designator
346 e.set_binding_depth(std::distance(contexts_.rbegin(), found_ctx));
347 e.unique_id_ = make_unique_id_from_binding_path(contexts_.rbegin(), found_ctx);
348 }
349 }
350
351 /* Compute the type of this designator based on the referenced source. */
352 M_insist(e.target_.index() != 0);
353 struct get_type {
354 const Type * operator()(std::monostate&) const { M_unreachable("target not set"); }
355 const Type * operator()(const Attribute *attr) const { return attr->type; }
356 const Type * operator()(const Expr *expr) const { return expr->type_; }
357 };
358 const PrimitiveType *pt = cast<const PrimitiveType>(std::visit(get_type(), e.target_));
359 e.type_ = pt;
360
361 if (not is_result)
362 e.type_ = pt->as_vectorial();
363
364 /* Check if any context between current context and found context is in stage `S_FROM`. */
365 for (auto it = contexts_.rbegin(); it != found_ctx; ++it) {
366 if ((*it)->stage == SemaContext::S_From) {
367 /* The designator is correlated and occurs in a nested query in the FROM. Emit an error. */
368 diag.e(e.attr_name.pos) << "Correlated attributes are not allowed in the FROM clause.\n";
369 e.type_ = Type::Get_Error();
370 return;
371 }
372 }
373
374 switch ((*found_ctx)->stage) {
375 default:
376 M_unreachable("designator not allowed in this stage");
377
379 /* The designator is correlated and occurs in a nested query in the FROM. Emit an error. */
380 diag.e(e.attr_name.pos) << "Correlated attributes are not allowed in the FROM clause.\n";
381 e.type_ = Type::Get_Error();
382 return;
383
389 /* The type of the attribute remains unchanged. Nothing to be done. */
390 break;
391 }
392}
393
394void Sema::operator()(Constant &e)
395{
396 int base = 8; // for integers
397 switch (e.tok.type) {
398 default:
399 M_unreachable("a constant must be one of the types below");
400
401 case TK_Null:
402 e.type_ = Type::Get_None();
403 break;
404
405 case TK_STRING_LITERAL:
406 e.type_ = Type::Get_Char(Type::TY_Scalar, interpret(*e.tok.text).length());
407 break;
408
409 case TK_DATE: {
410 int year, month, day;
411 sscanf(*e.tok.text, "d'%d-%d-%d'", &year, &month, &day);
412 if (year == 0) {
413 diag.e(e.tok.pos) << e << " has invalid year (after year -1 (1 BC) follows year 1 (1 AD)).\n";
415 return;
416 }
417 if (month < 1 or month > 12) {
418 diag.e(e.tok.pos) << e << " has invalid month.\n";
420 return;
421 }
422 if (day < 1 or (month == 2 and day > 29)
423 or ((month == 4 or month == 6 or month == 9 or month == 11) and day > 30)
424 or ((month == 1 or month == 3 or month == 5 or month == 7 or month == 8 or month == 10 or month == 12)
425 and day > 31)) {
426 diag.e(e.tok.pos) << e << " has invalid day.\n";
428 return;
429 }
430 e.type_ = Type::Get_Date(Type::TY_Scalar);
431 break;
432 }
433
434 case TK_DATE_TIME: {
435 int year, month, day, hour, minute, second;
436 sscanf(*e.tok.text, "d'%d-%d-%d %d:%d:%d'", &year, &month, &day, &hour, &minute, &second);
437 if (year == 0) {
438 diag.e(e.tok.pos) << e << " has invalid year (after year -1 (1 BC) follows year 1 (1 AD)).\n";
440 return;
441 }
442 if (month < 1 or month > 12) {
443 diag.e(e.tok.pos) << e << " has invalid month.\n";
445 return;
446 }
447 if (day < 1 or (month == 2 and day > 29)
448 or ((month == 4 or month == 6 or month == 9 or month == 11) and day > 30)
449 or ((month == 1 or month == 3 or month == 5 or month == 7 or month == 8 or month == 10 or month == 12)
450 and day > 31)) {
451 diag.e(e.tok.pos) << e << " has invalid day.\n";
453 return;
454 }
455 M_insist(hour >= 0);
456 if (hour > 23) {
457 diag.e(e.tok.pos) << e << " has invalid hour.\n";
459 return;
460 }
461 M_insist(minute >= 0);
462 if (minute > 59) {
463 diag.e(e.tok.pos) << e << " has invalid minute.\n";
465 return;
466 }
467 M_insist(second >= 0);
468 if (second > 59) {
469 diag.e(e.tok.pos) << e << " has invalid second.\n";
471 return;
472 }
473 e.type_ = Type::Get_Datetime(Type::TY_Scalar);
474 break;
475 }
476
477 case TK_True:
478 case TK_False:
479 e.type_ = Type::Get_Boolean(Type::TY_Scalar);
480 break;
481
482 case TK_HEX_INT:
483 base += 6;
484 case TK_DEC_INT:
485 base += 2;
486 case TK_OCT_INT: {
487 int64_t value = strtol(*e.tok.text, nullptr, base);
488 if (value == int32_t(value))
489 e.type_ = Type::Get_Integer(Type::TY_Scalar, 4);
490 else
491 e.type_ = Type::Get_Integer(Type::TY_Scalar, 8);
492 break;
493 }
494
495 case TK_DEC_FLOAT:
496 case TK_HEX_FLOAT:
497 e.type_ = Type::Get_Double(Type::TY_Scalar); // TODO: 32-bit floating-point constants
498 break;
499 }
500}
501
502void Sema::operator()(FnApplicationExpr &e)
503{
504 SemaContext &Ctx = get_context();
505 Catalog &C = Catalog::Get();
506
507 /* Analyze function name. */
508 auto d = cast<Designator>(e.fn.get());
509 if (not d or not d->is_identifier()) {
510 diag.e(d->attr_name.pos) << *d << " is not a valid function.\n";
511 d->type_ = e.type_ = Type::Get_Error();
512 return;
513 }
514 M_insist(bool(d));
515 M_insist(not d->type_, "This identifier has already been analyzed.");
516
517 /* Analyze arguments. */
518 for (auto &arg : e.args)
519 (*this)(*arg);
520
521 /* Lookup the function. */
522 if (C.has_database_in_use()) {
523 const auto &DB = C.get_database_in_use();
524 try {
525 e.func_ = DB.get_function(d->attr_name.text.assert_not_none());
526 } catch (std::out_of_range) {
527 diag.e(d->attr_name.pos) << "Function " << d->attr_name.text << " is not defined in database " << DB.name
528 << ".\n";
530 return;
531 }
532 } else {
533 try {
534 e.func_ = C.get_function(d->attr_name.text.assert_not_none());
535 } catch (std::out_of_range) {
536 diag.e(d->attr_name.pos) << "Function " << d->attr_name.text << " is not defined.\n";
538 return;
539 }
540 }
541 M_insist(e.func_);
542
543 /* Infer the type of the function. Functions are defined in an abstract way, where the type of the parameters is
544 * not specified. We must infer the parameter types and the return type of the function. */
545 switch (e.func_->fnid) {
546 default:
547 M_unreachable("Function not implemented");
548
549 case Function::FN_UDF:
550 diag.e(d->attr_name.pos) << "User-defined functions are not yet supported.\n";
551 d->type_ = e.type_ = Type::Get_Error();
552 return;
553
554 case Function::FN_MIN:
555 case Function::FN_MAX:
556 case Function::FN_SUM:
557 case Function::FN_AVG: {
558 if (e.args.size() == 0) {
559 diag.e(d->attr_name.pos) << "Missing argument for aggregate " << *d << ".\n";
560 d->type_ = e.type_ = Type::Get_Error();
561 return;
562 }
563 if (e.args.size() > 1) {
564 diag.e(d->attr_name.pos) << "Too many arguments for aggregate " << *d << ".\n";
565 d->type_ = e.type_ = Type::Get_Error();
566 return;
567 }
568 M_insist(e.args.size() == 1);
569 auto &arg = *e.args[0];
570 if (arg.type()->is_error()) {
571 /* skip argument of error type */
572 d->type_ = e.type_ = Type::Get_Error();
573 return;
574 }
575 if (not arg.type()->is_numeric()) {
576 /* invalid argument type */
577 diag.e(d->attr_name.pos) << "Argument of aggregate function must be of numeric type.\n";
578 d->type_ = e.type_ = Type::Get_Error();
579 return;
580 }
581 M_insist(arg.type()->is_numeric());
582 const Numeric *arg_type = cast<const Numeric>(arg.type());
583 if (not arg_type->is_vectorial()) {
584 diag.w(d->attr_name.pos) << "Argument of aggregate is not of vectorial type. "
585 "(Aggregates over scalars are discouraged.)\n";
586 }
587
588 switch (e.func_->fnid) {
589 default:
590 M_unreachable("Invalid function");
591
592 case Function::FN_MIN:
593 case Function::FN_MAX: {
594 /* MIN/MAX maintain type */
595 e.type_ = arg_type->as_scalar();
596 d->type_ = Type::Get_Function(e.type_, { arg.type() });
597 break;
598 }
599
600 case Function::FN_AVG: {
601 /* AVG always uses double precision floating-point */
602 e.type_ = Type::Get_Double(Type::TY_Scalar);
603 d->type_ = Type::Get_Function(e.type_, { arg.type() });
604 break;
605 }
606
607 case Function::FN_SUM: {
608 /* SUM can overflow. Always assume type of highest precision. */
609 switch (arg_type->kind) {
610 case Numeric::N_Int:
611 e.type_ = Type::Get_Integer(Type::TY_Scalar, 8);
612 break;
613
614 case Numeric::N_Float:
615 e.type_ = Type::Get_Double(Type::TY_Scalar);
616 break;
617
618 case Numeric::N_Decimal:
620 arg_type->scale);
621 break;
622 }
623 d->type_ = Type::Get_Function(e.type(), { e.type() });
624 break;
625 }
626 }
627 break;
628 }
629
630 case Function::FN_COUNT: {
631 if (e.args.size() > 1) {
632 diag.e(d->attr_name.pos) << "Too many arguments for aggregate " << *d << ".\n";
634 return;
635 }
636
637 /* TODO If argument is given, check whether it can be NULL. If not, COUNT(arg) == COUNT(*) */
638
639 e.type_ = Type::Get_Integer(Type::TY_Scalar, 8);
640 d->type_ = Type::Get_Function(e.type_, {});
641 break;
642 }
643
644 case Function::FN_ISNULL: {
645 if (e.args.size() == 0) {
646 diag.e(d->attr_name.pos) << "Missing argument for aggregate " << *d << ".\n";
648 return;
649 }
650 if (e.args.size() > 1) {
651 diag.e(d->attr_name.pos) << "Too many arguments for aggregate " << *d << ".\n";
653 return;
654 }
655 M_insist(e.args.size() == 1);
656 auto &arg = *e.args[0];
657
658 if (arg.type()->is_error()) {
660 return;
661 }
662 const PrimitiveType *arg_type = cast<const PrimitiveType>(arg.type());
663 if (not arg_type) {
664 diag.e(d->attr_name.pos) << "Function ISNULL can only be applied to expressions of primitive type.\n";
666 return;
667 }
668
669 d->type_ = Type::Get_Function(Type::Get_Boolean(arg_type->category), { arg.type() });
670 e.type_= Type::Get_Boolean(arg_type->category);
671 break;
672 }
673 }
674
675 M_insist(d->type_);
676 M_insist(d->type()->is_error() or cast<const FnType>(d->type()));
677 M_insist(e.type_);
678 M_insist(not e.type()->is_error());
679 M_insist(e.type()->is_primitive());
680
681 switch (Ctx.stage) {
683 M_unreachable("Function application in FROM clause is impossible");
684
686 if (e.func_->is_aggregate()) {
687 diag.e(d->attr_name.pos) << "Aggregate functions are not allowed in WHERE clause.\n";
688 return;
689 }
690 break;
691
693 if (e.func_->is_aggregate()) {
694 diag.e(d->attr_name.pos) << "Aggregate functions are not allowed in GROUP BY clause.\n";
695 return;
696 }
697 break;
698
700 /* nothing to be done */
701 break;
702
704 /* TODO */
705 break;
706
708 /* TODO */
709 break;
710
712 /* TODO */
713 break;
714 }
715}
716
717void Sema::operator()(UnaryExpr &e)
718{
719 /* Analyze sub-expression. */
720 (*this)(*e.expr);
721
722 /* If the sub-expression is erroneous, so is this expression. */
723 if (e.expr->type()->is_error()) {
725 return;
726 }
727
728 switch (e.op().type) {
729 default:
730 M_unreachable("invalid unary expression");
731
732 case TK_Not:
733 if (not e.expr->type()->is_boolean()) {
734 diag.e(e.op().pos) << "Invalid expression " << e << " must be boolean.\n";
736 return;
737 }
738 break;
739
740 case TK_PLUS:
741 case TK_MINUS:
742 case TK_TILDE:
743 if (not e.expr->type()->is_numeric()) {
744 diag.e(e.op().pos) << "Invalid expression " << e << " must be numeric.\n";
746 return;
747 }
748 break;
749 }
750
751 e.type_ = e.expr->type();
752}
753
754void Sema::operator()(BinaryExpr &e)
755{
756 /* Analyze sub-expressions. */
757 (*this)(*e.lhs);
758 (*this)(*e.rhs);
759
760 /* If at least one of the sub-expressions is erroneous, so is this expression. */
761 if (e.lhs->type()->is_error() or e.rhs->type()->is_error()) {
763 return;
764 }
765
766 /* Validate that lhs and rhs are compatible with binary operator. */
767 switch (e.op().type) {
768 default:
769 M_unreachable("Invalid binary operator.");
770
771 /* Arithmetic operations are only valid for numeric types. Compute the type of the binary expression that is
772 * precise enough. */
773 case TK_PLUS:
774 case TK_MINUS:
775 case TK_ASTERISK:
776 case TK_SLASH:
777 case TK_PERCENT: {
778 /* Verify that both operands are of numeric type. */
779 const Numeric *ty_lhs = cast<const Numeric>(e.lhs->type());
780 const Numeric *ty_rhs = cast<const Numeric>(e.rhs->type());
781 if (not ty_lhs or not ty_rhs) {
782 diag.e(e.op().pos) << "Invalid expression " << e << ", operands must be of numeric type.\n";
784 return;
785 }
786 M_insist(ty_lhs);
787 M_insist(ty_rhs);
788
789 /* Compute type of the binary expression. */
790 e.type_ = e.common_operand_type = arithmetic_join(ty_lhs, ty_rhs);
791 break;
792 }
793
794 case TK_DOTDOT: {
795 /* Concatenation of two strings. */
796 auto ty_lhs = cast<const CharacterSequence>(e.lhs->type());
797 auto ty_rhs = cast<const CharacterSequence>(e.rhs->type());
798 if (not ty_lhs or not ty_rhs) {
799 diag.e(e.op().pos) << "Invalid expression " << e << ", concatenation requires string operands.\n";
801 return;
802 }
803 M_insist(ty_lhs);
804 M_insist(ty_rhs);
805
806 /* Scalar and scalar yield a scalar. Otherwise, expression yields a vectorial. */
807 Type::category_t c = std::max(ty_lhs->category, ty_rhs->category);
808
809 e.type_ = Type::Get_Char(c, ty_lhs->length + ty_rhs->length);
810 break;
811 }
812
813 case TK_LESS:
814 case TK_LESS_EQUAL:
815 case TK_GREATER:
816 case TK_GREATER_EQUAL: {
817 if (auto ty_lhs = cast<const Numeric>(e.lhs->type())) {
818 /* Verify that both operands are of numeric type. */
819 auto ty_rhs = cast<const Numeric>(e.rhs->type());
820 if (not ty_lhs or not ty_rhs) {
821 diag.e(e.op().pos) << "Invalid expression " << e << ", both operands must be of numeric type.\n";
823 return;
824 }
825 M_insist(ty_lhs);
826 M_insist(ty_rhs);
827
828 /* Scalar and scalar yield a scalar. Otherwise, expression yields a vectorial. */
829 Type::category_t c = std::max(ty_lhs->category, ty_rhs->category);
830
831 /* Comparisons always have boolean type. */
833 e.common_operand_type = arithmetic_join(ty_lhs, ty_rhs);
834 } else if (auto ty_lhs = cast<const CharacterSequence>(e.lhs->type())) {
835 /* Verify that both operands are character sequences. */
836 auto ty_rhs = cast<const CharacterSequence>(e.rhs->type());
837 if (not ty_lhs or not ty_rhs) {
838 diag.e(e.op().pos) << "Invalid expression " << e << ", both operands must be strings.\n";
840 return;
841 }
842 M_insist(ty_lhs);
843 M_insist(ty_rhs);
844
845 /* Scalar and scalar yield a scalar. Otherwise, expression yields a vectorial. */
846 Type::category_t c = std::max(ty_lhs->category, ty_rhs->category);
847
848 /* Comparisons always have boolean type. */
850 } else if (auto ty_lhs = cast<const Date>(e.lhs->type())) {
851 /* Verify that both operands are dates. */
852 auto ty_rhs = cast<const Date>(e.rhs->type());
853 if (not ty_lhs or not ty_rhs) {
854 diag.e(e.op().pos) << "Invalid expression " << e << ", both operands must be dates.\n";
856 return;
857 }
858 M_insist(ty_lhs);
859 M_insist(ty_rhs);
860
861 /* Scalar and scalar yield a scalar. Otherwise, expression yields a vectorial. */
862 Type::category_t c = std::max(ty_lhs->category, ty_rhs->category);
863
864 /* Comparisons always have boolean type. */
866 } else if (auto ty_lhs = cast<const DateTime>(e.lhs->type())) {
867 /* Verify that both operands are datetimes. */
868 auto ty_rhs = cast<const DateTime>(e.rhs->type());
869 if (not ty_lhs or not ty_rhs) {
870 diag.e(e.op().pos) << "Invalid expression " << e << ", both operands must be datetimes.\n";
872 return;
873 }
874 M_insist(ty_lhs);
875 M_insist(ty_rhs);
876
877 /* Scalar and scalar yield a scalar. Otherwise, expression yields a vectorial. */
878 Type::category_t c = std::max(ty_lhs->category, ty_rhs->category);
879
880 /* Comparisons always have boolean type. */
882 } else {
883 diag.e(e.op().pos) << "Invalid expression " << e << ", operator not supported for given operands.\n";
885 return;
886 }
887 break;
888 }
889
890 case TK_EQUAL:
891 case TK_BANG_EQUAL: {
892 if (not is_comparable(e.lhs->type(), e.rhs->type())) {
893 diag.e(e.op().pos) << "Invalid expression " << e << ", operands are incomparable.\n";
895 return;
896 }
897 const PrimitiveType *ty_lhs = as<const PrimitiveType>(e.lhs->type());
898 const PrimitiveType *ty_rhs = as<const PrimitiveType>(e.rhs->type());
899
900 /* Scalar and scalar yield a scalar. Otherwise, expression yields a vectorial. */
901 Type::category_t c = std::max(ty_lhs->category, ty_rhs->category);
902
903 /* Comparisons always have boolean type. */
905 if (auto ty_lhs = cast<const Numeric>(e.lhs->type()))
906 e.common_operand_type = arithmetic_join(ty_lhs, as<const Numeric>(e.rhs->type()));
907 break;
908 }
909
910 case TK_Like: {
911 auto ty_lhs = cast<const CharacterSequence>(e.lhs->type());
912 auto ty_rhs = cast<const CharacterSequence>(e.rhs->type());
913 if (not ty_lhs or not ty_rhs) {
914 diag.e(e.op().pos) << "Invalid expression " << e << ", operands must be character sequences.\n";
916 return;
917 }
918 M_insist(ty_lhs);
919 M_insist(ty_rhs);
920
921 /* Scalar and scalar yield a scalar. Otherwise, expression yields a vectorial. */
922 Type::category_t c = std::max(ty_lhs->category, ty_rhs->category);
923
924 /* Comparisons always have boolean type. */
926 break;
927 }
928
929 case TK_And:
930 case TK_Or: {
931 const Boolean *ty_lhs = cast<const Boolean>(e.lhs->type());
932 const Boolean *ty_rhs = cast<const Boolean>(e.rhs->type());
933
934 /* Both operands must be of boolean type. */
935 if (not ty_lhs or not ty_rhs) {
936 diag.e(e.op().pos) << "Invalid expression " << e << ", operands must be of boolean type.\n";
938 return;
939 }
940
941 /* Scalar and scalar yield a scalar. Otherwise, expression yields a vectorial. */
942 Type::category_t c = std::max(ty_lhs->category, ty_rhs->category);
943
944 /* Logical operators always have boolean type. */
946 break;
947 }
948 }
949}
950
951void Sema::operator()(QueryExpr &e)
952{
953 M_insist(is<SelectStmt>(*e.query), "nested statements are always select statements");
954
955 SemaContext &Ctx = get_context();
956
957 /* Evaluate the nested statement in a fresh sema context. */
958 push_context(*e.query, e.alias());
959 (*this)(*e.query);
960 M_insist(not contexts_.empty());
961 SemaContext inner_ctx = pop_context();
962
963 /* TODO an EXISTS operator allows multiple results */
964 if (1 != inner_ctx.results.size()) {
965 diag.e(e.tok.pos) << "Invalid expression:\n" << e << ",\nnested statement must return a single column.\n";
967 return;
968 }
969 M_insist(1 == inner_ctx.results.size());
970 Expr &res = inner_ctx.results.begin()->second.expr();
971
972 if (not res.type()->is_primitive()) {
973 diag.e(e.tok.pos) << "Invalid expression:\n" << e << ",\nnested statement must return a primitive value.\n";
975 return;
976 }
977 auto *pt = as<const PrimitiveType>(res.type_);
978 e.type_ = pt;
979
980 switch (Ctx.stage) {
981 default: {
982 diag.e(e.tok.pos) << "Nested statements are not allowed in this stage.\n";
984 return;
985 }
988 /* TODO The result must not be a single scalar value in general. */
989
991 /* The result of the nested query must be a single scalar value. */
992
993 if (not pt->is_scalar()) {
994 diag.e(e.tok.pos) << "Invalid expression:\n" << e
995 << ",\nnested statement must return a scalar value.\n";
997 return;
998 }
999
1000 auto is_fn = is<FnApplicationExpr>(res);
1001 auto is_const = res.is_constant();
1002 auto &q = as<const SelectStmt>(*e.query);
1003 /* The result is a single value iff it is a constant and there is no from clause or
1004 * iff it is an aggregate and there is no group_by clause. */
1005 if (not(is_const and not q.from) and not(is_fn and not q.group_by)) {
1006 diag.e(e.tok.pos) << "Invalid expression:\n" << e
1007 << ",\nnested statement must return a single value.\n";
1008 e.type_ = Type::Get_Error();
1009 return;
1010 }
1011 break;
1012 }
1013 }
1014}
1015
1016/*===== Clause =======================================================================================================*/
1017
1018void Sema::operator()(ErrorClause&)
1019{
1020 /* nothing to be done */
1021}
1022
1023void Sema::operator()(SelectClause &c)
1024{
1025 SemaContext &Ctx = get_context();
1026 Ctx.stage = SemaContext::S_Select;
1027 Catalog &C = Catalog::Get();
1028
1029 bool has_vectorial = false;
1030 bool has_scalar = false;
1031 uint64_t const_counter = 0;
1032 M_insist(Ctx.results.empty());
1033 unsigned result_counter = 0;
1034
1035 if (c.select_all) {
1036 /* Expand the `SELECT *` by creating dummy expressions for all accessible values of all sources. */
1037 auto &stmt = as<const SelectStmt>(Ctx.stmt);
1038
1039 if (stmt.group_by) {
1040 /* If the statement contains a GROUP BY clause, we must include all grouping keys in the result. */
1041 auto &group_by = as<const GroupByClause>(*stmt.group_by);
1042 has_scalar = has_scalar or not group_by.group_by.empty();
1043 for (auto &[expr, alias] : group_by.group_by) {
1044 std::unique_ptr<Designator> d;
1045 if (alias) { // alias was given
1046 d = create_designator(alias.text.assert_not_none(), expr->tok, *expr);
1047 } else if (auto D = cast<const ast::Designator>(expr.get())) { // no alias, but designator -> keep name
1048 d = create_designator(D->attr_name.text.assert_not_none(), D->tok, *D);
1049 } else { // no designator, no alias -> derive name
1050 std::ostringstream oss;
1051 oss << *expr;
1052 d = create_designator(C.pool(oss.str().c_str()), expr->tok, *expr);
1053 }
1054 if (auto ty = cast<const PrimitiveType>(d->type()))
1055 d->type_ = ty->as_scalar();
1056 else
1057 M_insist(d->type()->is_error(), "grouping key must be of primitive type");
1058 auto attr_name = d->attr_name.text.assert_not_none();
1059 auto &ref = c.expanded_select_all.emplace_back(std::move(d));
1060 (*this)(*ref);
1061 Ctx.results.emplace(std::move(attr_name), SemaContext::result_t(*ref, result_counter++, alias.text));
1062 }
1063 } else if (stmt.having) {
1064 /* A statement with a HAVING clause but without a GROUP BY clause may only have literals in its SELECT
1065 * clause. Therefore, '*' has no meaning and we should emit a warning. */
1066 diag.w(c.select_all.pos) << "The '*' has no meaning in this query. Did you forget the GROUP BY clause?.\n";
1067 } else {
1068 /* The '*' in the SELECT clause selects all attributes of all sources. */
1069 for (auto &[src_name, src] : Ctx.sources) {
1070 if (auto ref = std::get_if<std::reference_wrapper<const Table>>(&src.first)) {
1071 /* The source is a database table. */
1072 auto &tbl = ref->get();
1073 for (auto &attr : tbl) {
1074 auto d = create_designator(
1075 /* pos= */ c.select_all.pos,
1076 /* table_name= */ src_name,
1077 /* attr_name= */ attr.name,
1078 /* target= */ &attr,
1079 /* type= */ attr.type
1080 );
1081 auto &ref = c.expanded_select_all.emplace_back(std::move(d));
1082 (*this)(*ref);
1083 Ctx.results.emplace(attr.name, SemaContext::result_t(*ref, result_counter++));
1084 }
1085 has_vectorial = true;
1086 } else {
1087 /* The source is a nested query. */
1088 auto &named_exprs = std::get<SemaContext::named_expr_table>(src.first);
1089 std::vector<std::unique_ptr<Expr>> expanded_select_all(named_exprs.size());
1090 for (auto &[name, expr_w_pos] : named_exprs) {
1091 auto &[expr, pos] = expr_w_pos;
1092 auto d = create_designator(
1093 /* pos= */ c.select_all.pos,
1094 /* table_name= */ src_name,
1095 /* attr_name= */ name,
1096 /* target= */ &expr.get(),
1097 /* type= */ expr.get().type()
1098 );
1099 auto &ref = (expanded_select_all[pos] = std::move(d));
1100 (*this)(*ref);
1101 if (auto pt = cast<const PrimitiveType>(ref->type())) {
1102 has_scalar = has_scalar or pt->is_scalar();
1103 has_vectorial = has_vectorial or pt->is_vectorial();
1104 } else {
1105 M_insist(ref->type()->is_error(), "result of nested query must be of primitive type");
1106 }
1107 Ctx.results.emplace(name, SemaContext::result_t(*ref, result_counter + pos));
1108 }
1109 result_counter += named_exprs.size();
1110 for (auto &e : expanded_select_all) c.expanded_select_all.emplace_back(std::move(e));
1111 }
1112 }
1113 }
1114 }
1115
1116 for (auto it = c.select.begin(), end = c.select.end(); it != end; ++it) {
1117 auto &select_expr = *it->first;
1118 auto alias = it->second;
1119
1120 (*this)(select_expr); // recursively analyze select expression
1121 if (select_expr.contains_free_variables() and not is<QueryExpr>(select_expr))
1122 diag.e(select_expr.tok.pos) << select_expr << " contains free variables (not yet supported).\n";
1123
1124 if (select_expr.type()->is_error()) continue;
1125
1126 /* Expressions *must* be scalar when we have grouping. */
1127 if (auto pt = cast<const PrimitiveType>(select_expr.type()); Ctx.needs_grouping and pt and pt->is_vectorial()) {
1128 diag.e(select_expr.tok.pos) << select_expr << " is not scalar.\n";
1129 continue;
1130 }
1131
1132 /* Constants and scalar values of nested queries can be broadcast from scalar to vectorial. We collect the
1133 * scalar/vector-ness information of each expression in the SELECT clause. */
1134 if (not select_expr.is_constant() and not is<QueryExpr>(select_expr)) {
1135 auto pt = as<const PrimitiveType>(select_expr.type());
1136 has_vectorial = has_vectorial or pt->is_vectorial();
1137 has_scalar = has_scalar or pt->is_scalar();
1138 }
1139
1140 if (alias) { // SELECT expression has alias?
1141 /* Expression with alias. */
1142 Ctx.results.emplace(alias.text, SemaContext::result_t(select_expr, result_counter++, alias.text));
1143 auto pred = [&](const std::pair<std::unique_ptr<Expr>, Token> &sel) {
1144 return sel.second.text == alias.text;
1145 };
1146 if (auto num = std::count_if(c.select.begin(), it, pred)) {
1147 /* Found ambiguous alias which is only allowed without accessing it. This is checked via the `Ctx`
1148 * in which the ambiguous alias is contained. However, make alias unique for later accessing steps. */
1149 oss.str("");
1150 oss << alias.text << "$" << num;
1151 alias.text = C.pool(oss.str().c_str());
1152 }
1153 } else if (auto d = cast<Designator>(&select_expr)) {
1154 /* Expression is a designator. Simply reuse the name without table prefix. */
1155 Ctx.results.emplace(d->attr_name.text, SemaContext::result_t(*d, result_counter++));
1156 } else {
1157 M_insist(not is<Designator>(select_expr));
1158 /* Expression without alias. Print expression as string to get a name. Use '$const' as prefix for
1159 * constants. */
1160 oss.str("");
1161 if (select_expr.is_constant())
1162 oss << "$const" << const_counter++;
1163 else
1164 oss << select_expr;
1165 Ctx.results.emplace(C.pool(oss.str().c_str()), SemaContext::result_t(select_expr, result_counter++));
1166 }
1167 }
1168
1169 if (has_vectorial and has_scalar)
1170 diag.e(c.tok.pos) << "SELECT clause with mixed scalar and vectorial values is forbidden.\n";
1171}
1172
1173void Sema::operator()(FromClause &c)
1174{
1175 SemaContext &Ctx = get_context();
1176 Ctx.stage = SemaContext::S_From;
1177
1178 Catalog &C = Catalog::Get();
1179 const auto &DB = C.get_database_in_use();
1180
1181 M_insist(Ctx.sources.empty());
1182 unsigned source_counter = 0;
1183
1184 /* Check whether the source tables in the FROM clause exist in the database. Add the source tables to the current
1185 * context, using their alias if provided (e.g. FROM src AS alias). */
1186 for (auto &src: c.from) {
1187 if (auto name = std::get_if<Token>(&src.source)) {
1188 try {
1189 const Table &T = DB.get_table(name->text.assert_not_none());
1190 Token table_name = src.alias ? src.alias : *name; // FROM name AS alias ?
1191 auto res = Ctx.sources.emplace(table_name.text, std::make_pair(std::ref(T), source_counter++));
1192 /* Check if the table name is already in use in other contexts. */
1193 bool unique = true;
1194 for (std::size_t i = 0; i < contexts_.size() - 1; ++i) {
1195 if (contexts_[i]->stage == SemaContext::S_From) continue;
1196 if (contexts_[i]->sources.contains(table_name.text.assert_not_none())) {
1197 unique = false;
1198 break;
1199 }
1200 }
1201 if (not res.second or not unique)
1202 diag.e(table_name.pos) << "Table name " << table_name.text << " already in use.\n";
1203 src.table_ = &T;
1204 } catch (std::out_of_range) {
1205 diag.e(name->pos) << "No table " << name->text << " in database " << DB.name << ".\n";
1206 return;
1207 }
1208 } else if (auto stmt = std::get_if<Stmt*>(&src.source)) {
1209 M_insist(is<SelectStmt>(*stmt), "nested statements are always select statements");
1210
1211 /* Evaluate the nested statement in a fresh sema context. */
1212 push_context(**stmt, src.alias.text);
1213 (*this)(**stmt);
1214 M_insist(not contexts_.empty());
1215 SemaContext inner_ctx = pop_context();
1216
1218 for (auto &[name, res] : inner_ctx.results)
1219 results.emplace(name, std::make_pair(std::ref(res.expr()), res.order));
1220
1221 /* Add the results of the nested statement to the list of sources. */
1222 auto res = Ctx.sources.emplace(src.alias.text, std::make_pair(std::move(results), source_counter++));
1223 /* Convert scalar results to vectorials. */
1224 for (auto &[_, result] : inner_ctx.results)
1225 result.expr().type_ = as<const PrimitiveType>(result.expr().type())->as_vectorial();
1226 /* Check if the table name is already in use in other contexts. */
1227 bool unique = true;
1228 for (std::size_t i = 0; i < contexts_.size() - 1; ++i) {
1229 if (contexts_[i]->stage == SemaContext::S_From) continue;
1230 if (contexts_[i]->sources.contains(src.alias.text.assert_not_none())) {
1231 unique = false;
1232 break;
1233 }
1234 }
1235 if (not res.second or not unique) {
1236 diag.e(src.alias.pos) << "Table name " << src.alias.text << " already in use.\n";
1237 return;
1238 }
1239 } else {
1240 M_unreachable("invalid variant");
1241 }
1242 }
1243}
1244
1245void Sema::operator()(WhereClause &c)
1246{
1247 SemaContext &Ctx = get_context();
1248 Ctx.stage = SemaContext::S_Where;
1249
1250 /* Analyze expression. */
1251 (*this)(*c.where);
1252
1253 if (c.where->type()->is_error())
1254 return; /* nothing to be done */
1255
1256 const Boolean *ty = cast<const Boolean>(c.where->type());
1257
1258 /* WHERE condition must be of boolean type. */
1259 if (not ty) {
1260 diag.e(c.tok.pos) << "The expression in the WHERE clause must be of boolean type.\n";
1261 return;
1262 }
1263}
1264
1265void Sema::operator()(GroupByClause &c)
1266{
1267 Catalog &C = Catalog::Get();
1268 SemaContext &Ctx = get_context();
1269 Ctx.stage = SemaContext::S_GroupBy;
1270
1271 Ctx.needs_grouping = true;
1272 for (auto &[expr, alias] : c.group_by) {
1273 (*this)(*expr);
1274
1275 /* Skip errors. */
1276 if (expr->type()->is_error())
1277 continue;
1278
1279 if (expr->contains_free_variables())
1280 diag.e(expr->tok.pos) << *expr << " contains free variable(s) (not yet supported).\n";
1281
1282 const PrimitiveType *pt = cast<const PrimitiveType>(expr->type());
1283
1284 /* Can only group by expressions of primitive type. */
1285 if (not pt) {
1286 diag.e(c.tok.pos) << "Cannot group by " << *expr << ", has invalid type.\n";
1287 continue;
1288 }
1289
1290 /* Can only group by vectorials. The expression in the GROUP BY clause must be evaluated per tuple. */
1291 if (not pt->is_vectorial()) {
1292 diag.e(c.tok.pos) << "Cannot group by " << *expr << ". Expressions in the GROUP BY clause must be "
1293 "vectorial, i.e. they must depend on each row separately.\n";
1294 continue;
1295 }
1296
1297 /* Add expression to list of grouping keys. */
1298 if (alias) {
1299 Ctx.grouping_keys.emplace(alias.text, *expr);
1300 } else if (auto d = cast<Designator>(expr.get())) {
1301 Ctx.grouping_keys.emplace(d->attr_name.text, *expr);
1302 } else {
1303 oss.str("");
1304 oss << *expr;
1305 Ctx.grouping_keys.emplace(C.pool(oss.str().c_str()), *expr);
1306 }
1307 }
1308}
1309
1310void Sema::operator()(HavingClause &c)
1311{
1312 SemaContext &Ctx = get_context();
1313 Ctx.stage = SemaContext::S_Having;
1314 Ctx.needs_grouping = true;
1315
1316 (*this)(*c.having);
1317
1318 /* Skip errors. */
1319 if (c.having->type()->is_error())
1320 return;
1321
1322 const Boolean *ty = cast<const Boolean>(c.having->type());
1323
1324 /* HAVING condition must be of boolean type. */
1325 if (not ty) {
1326 diag.e(c.tok.pos) << "The expression in the HAVING clause must be of boolean type.\n";
1327 return;
1328 }
1329
1330 if (not ty->is_scalar()) {
1331 diag.e(c.tok.pos) << "The expression in the HAVING clause must be scalar.\n";
1332 return;
1333 }
1334
1335 /* TODO The HAVING clause must be a conjunction or disjunction of aggregates or comparisons of grouping keys. */
1336}
1337
1338void Sema::operator()(OrderByClause &c)
1339{
1340 SemaContext &Ctx = get_context();
1341 Ctx.stage = SemaContext::S_OrderBy;
1342
1343 /* Analyze all ordering expressions. */
1344 for (auto &o : c.order_by) {
1345 auto &e = o.first;
1346 (*this)(*e);
1347
1348 if (e->type()->is_error()) continue;
1349 if (e->contains_free_variables())
1350 diag.e(e->tok.pos) << *e << " contains free variable(s) (not yet supported).\n";
1351
1352 auto pt = as<const PrimitiveType>(e->type());
1353
1354 if (Ctx.needs_grouping) { // w/ grouping
1355 /* If we grouped, the grouping keys now have scalar type. */
1356 if (pt->is_vectorial())
1357 diag.e(c.tok.pos) << "Cannot order by " << *e << ", expression must be scalar.\n";
1358 } else { // w/o grouping
1359 /* If we did not group, the ordering expressions must be vectorial. */
1360 if (pt->is_scalar())
1361 diag.e(c.tok.pos) << "Cannot order by " << *e << ", expression must be vectorial.\n";
1362 }
1363 }
1364}
1365
1366void Sema::operator()(LimitClause &c)
1367{
1368 SemaContext &Ctx = get_context();
1369 Ctx.stage = SemaContext::S_Limit;
1370
1371 /* TODO limit only makes sense when SELECT is vectorial and not scalar */
1372
1373 errno = 0;
1374 strtoull(*c.limit.text, nullptr, 0);
1375 if (errno == EINVAL)
1376 diag.e(c.limit.pos) << "Invalid value for LIMIT.\n";
1377 else if (errno == ERANGE)
1378 diag.e(c.limit.pos) << "Value of LIMIT out of range.\n";
1379 else if (errno != 0)
1380 diag.e(c.limit.pos) << "Invalid LIMIT.\n";
1381
1382 if (c.offset) {
1383 errno = 0;
1384 strtoull(*c.offset.text, nullptr, 0);
1385 if (errno == EINVAL)
1386 diag.e(c.offset.pos) << "Invalid value for OFFSET.\n";
1387 else if (errno == ERANGE)
1388 diag.e(c.offset.pos) << "Value of OFFSET out of range.\n";
1389 else if (errno != 0)
1390 diag.e(c.offset.pos) << "Invalid OFFSET.\n";
1391 }
1392}
1393
1394
1395/*===== Instruction ==================================================================================================*/
1396
1397void Sema::operator()(Instruction &I) {
1398 Catalog &C = Catalog::Get();
1399 try {
1401 } catch (std::invalid_argument) {
1402 diag.e(I.tok.pos) << "Instruction " << I.name << " unknown\n";
1403 }
1404}
1405
1406
1407/*===== Stmt =========================================================================================================*/
1408
1409void Sema::operator()(ErrorStmt&)
1410{
1411 /* nothing to be done */
1412}
1413
1414void Sema::operator()(EmptyStmt&)
1415{
1416 command_ = std::make_unique<EmptyCommand>();
1417}
1418
1419void Sema::operator()(CreateDatabaseStmt &s)
1420{
1421 RequireContext RCtx(this, s);
1422 Catalog &C = Catalog::Get();
1423 auto db_name = s.database_name.text.assert_not_none();
1424
1425 if (not C.has_database(db_name))
1426 command_ = std::make_unique<CreateDatabase>(std::move(db_name));
1427 else
1428 diag.e(s.database_name.pos) << "Database " << db_name << " already exists.\n";
1429}
1430
1431void Sema::operator()(DropDatabaseStmt &s)
1432{
1433 RequireContext RCtx(this, s);
1434 Catalog &C = Catalog::Get();
1435 auto db_name = s.database_name.text.assert_not_none();
1436
1437 if (C.has_database_in_use()) {
1438 if (C.get_database_in_use().name == db_name) {
1439 diag.e(s.database_name.pos) << "Database " << db_name << " is in use.\n";
1440 return;
1441 }
1442 }
1443
1444 if (C.has_database(db_name))
1445 command_ = std::make_unique<DropDatabase>(std::move(db_name));
1446 else {
1447 if (s.has_if_exists)
1448 command_ = std::make_unique<EmptyCommand>();
1449 else
1450 diag.e(s.database_name.pos) << "Database " << db_name << " does not exist.\n";
1451 }
1452}
1453
1454void Sema::operator()(UseDatabaseStmt &s)
1455{
1456 RequireContext RCtx(this, s);
1457 Catalog &C = Catalog::Get();
1458 auto db_name = s.database_name.text.assert_not_none();
1459
1460 if (C.has_database(db_name))
1461 command_ = std::make_unique<UseDatabase>(std::move(db_name));
1462 else
1463 diag.e(s.database_name.pos) << "Database " << db_name << " does not exist.\n";
1464}
1465
1466void Sema::operator()(CreateTableStmt &s)
1467{
1468 RequireContext RCtx(this, s);
1469 Catalog &C = Catalog::Get();
1470
1471 if (not C.has_database_in_use()) {
1472 diag.err() << "No database selected.\n";
1473 return;
1474 }
1475 auto &DB = C.get_database_in_use();
1476 auto table_name = s.table_name.text.assert_not_none();
1477 std::unique_ptr<Table> T = C.table_factory().make(table_name);
1478
1479 /* Add the newly declared table to the list of sources of the sema context. We need to add the table to the sema
1480 * context so that semantic analysis of `CHECK` expressions can resolve references to attributes of the same table.
1481 * */
1482 get_context().sources.emplace(table_name, std::make_pair(SemaContext::source_type(*T), 0U));
1483
1484 /* Verify table does not yet exist. */
1485 try {
1486 DB.get_table(table_name);
1487 diag.e(s.table_name.pos) << "Table " << table_name << " already exists in database " << DB.name << ".\n";
1488 } catch (std::out_of_range) {
1489 /* nothing to be done */
1490 }
1491
1492 /* Analyze attributes and add them to the new table. */
1493 bool has_primary_key = false;
1494 for (auto &attr : s.attributes) {
1495 auto attribute_name = attr->name.text.assert_not_none();
1496 const PrimitiveType *ty = cast<const PrimitiveType>(attr->type);
1497 if (not ty) {
1498 diag.e(attr->name.pos) << "Attribute " << attr->name.text << " cannot be defined with type " << *attr->type
1499 << ".\n";
1500 return;
1501 }
1502 attr->type = ty->as_vectorial(); // convert potentially scalar type to vectorial
1503
1504 /* Before we check the constraints, we must add this newly declared attribute to its table, and hence to the
1505 * sema context. */
1506 try {
1507 T->push_back(attribute_name, ty->as_vectorial());
1508 } catch (std::invalid_argument) {
1509 /* attribute name is a duplicate */
1510 diag.e(attr->name.pos) << "Attribute " << attr->name.text << " occurs multiple times in defintion of table "
1511 << table_name << ".\n";
1512 }
1513
1514 /* Check constraint definitions. */
1515 bool has_reference = false;
1516 bool is_unique = false, is_not_null = false;
1518 for (auto &c : attr->constraints) {
1519 if (is<PrimaryKeyConstraint>(c)) {
1520 if (has_primary_key)
1521 diag.e(attr->name.pos) << "Duplicate definition of primary key as attribute " << attr->name.text
1522 << ".\n";
1523 has_primary_key = true;
1524 T->add_primary_key(attribute_name);
1525 }
1526
1527 if (is<UniqueConstraint>(c)) {
1528 if (is_unique)
1529 diag.w(c->tok.pos) << "Duplicate definition of attribute " << attr->name.text << " as UNIQUE.\n";
1530 is_unique = true;
1531 T->at(attribute_name).unique = true;
1532 }
1533
1534 if (is<NotNullConstraint>(c)) {
1535 if (is_not_null)
1536 diag.w(c->tok.pos) << "Duplicate definition of attribute " << attr->name.text << " as NOT NULL.\n";
1537 is_not_null = true;
1538 T->at(attribute_name).not_nullable = true;
1539 }
1540
1541 if (auto check = cast<CheckConditionConstraint>(c)) {
1542 /* Verify that the type of the condition is boolean. */
1543 /* TODO if the condition uses already mentioned attributes, we must add them to the sema context before
1544 * invoking semantic analysis of the condition! */
1545 (*this)(*check->cond);
1546 auto ty = check->cond->type();
1547 if (not ty->is_boolean())
1548 diag.e(check->tok.pos) << "Condition " << *check->cond << " is an invalid CHECK constraint.\n";
1549 }
1550
1551 if (auto ref = cast<ReferenceConstraint>(c)) {
1552 if (has_reference)
1553 diag.e(ref->tok.pos) << "Attribute " << attr->name.text << " must not have multiple references.\n";
1554 has_reference = true;
1555
1556 /* Check that the referenced attribute exists. */
1557 try {
1558 auto &ref_table = DB.get_table(ref->table_name.text.assert_not_none());
1559 try {
1560 auto &ref_attr = ref_table.at(ref->attr_name.text.assert_not_none());
1561 if (attr->type != ref_attr.type)
1562 diag.e(ref->attr_name.pos) << "Referenced attribute has different type.\n";
1563 T->at(attr->name.text.assert_not_none()).reference = &ref_attr;
1564 } catch (std::out_of_range) {
1565 diag.e(ref->attr_name.pos) << "Invalid reference, attribute " << ref->attr_name.text
1566 << " not found in table " << ref->table_name.text << ".\n";
1567 }
1568 } catch (std::out_of_range) {
1569 diag.e(ref->table_name.pos) << "Invalid reference, table " << ref->table_name.text
1570 << " not found.\n";
1571 }
1572 }
1573 }
1574 }
1575
1576 if (not is_nested() and not diag.num_errors())
1577 command_ = std::make_unique<CreateTable>(std::move(T));
1578}
1579
1580void Sema::operator()(DropTableStmt &s)
1581{
1582 RequireContext RCtx(this, s);
1583 Catalog &C = Catalog::Get();
1584
1585 if (not C.has_database_in_use()) {
1586 diag.err() << "No database selected.\n";
1587 return;
1588 }
1589 auto &DB = C.get_database_in_use();
1590
1591 bool ok = true;
1592 std::vector<ThreadSafePooledString> table_names;
1593 for (auto &tok : s.table_names) {
1594 auto table_name = tok->text.assert_not_none();
1595 if (DB.has_table(table_name))
1596 table_names.emplace_back(std::move(table_name));
1597 else {
1598 if (not s.has_if_exists) {
1599 diag.e(tok->pos) << "Table " << table_name << " does not exist in database " << DB.name << ".\n";
1600 ok = false;
1601 } else {
1602 diag.n(tok->pos) << "Table " << table_name << " does not exist in database " << DB.name << ". "
1603 << "Skipping.\n";
1604 }
1605 }
1606 }
1607 if (ok)
1608 command_ = std::make_unique<DropTable>(std::move(table_names));
1609}
1610
1611void Sema::operator()(CreateIndexStmt &s)
1612{
1613 RequireContext RCtx(this, s);
1614 Catalog &C = Catalog::Get();
1615
1616 if (not C.has_database_in_use()) {
1617 diag.err() << "No database selected.\n";
1618 return;
1619 }
1620 auto &DB = C.get_database_in_use();
1621
1622 /* Check if `UNIQUE` was present in statement. */
1623 if (s.has_unique) {
1624 diag.e(s.has_unique.pos) << "Keyword UNIQUE not supported.\n";
1625 return;
1626 }
1627
1628 /* Check that an index name is set. */
1629 if (not s.index_name) {
1630 diag.err() << "Indexes without name not supported.\n";
1631 return;
1632 }
1633
1634 /* Check that the index name does not yet exist. */
1635 auto index_name = s.index_name.text.assert_not_none();
1636 if (DB.has_index(index_name)) {
1637 if (s.has_if_not_exists) {
1638 diag.w(s.index_name.pos) << "Index " << index_name << " already exists in database " << DB.name
1639 << ". Skipping.\n";
1640 command_ = std::make_unique<EmptyCommand>();
1641 return;
1642 } else {
1643 diag.e(s.index_name.pos) << "Index " << index_name << " already exists in database " << DB.name << ".\n";
1644 return;
1645 }
1646 }
1647
1648 /* Check that the table exists. */
1649 auto table_name = s.table_name.text.assert_not_none();
1650 if (not DB.has_table(table_name)) {
1651 diag.e(s.table_name.pos) << "Table " << table_name << " does not exist in database " << DB.name << "\n.";
1652 return;
1653 }
1654 auto &table = DB.get_table(table_name);
1655
1656 /* Check that the index method exists. */
1657 if (not s.method) { // if method is not set, set to default
1658 s.method = Token::CreateArtificial(TK_Default);
1659 s.method.pos = s.table_name.pos;
1660 }
1661 switch(s.method.type) {
1662 case TK_Default: // ok
1663 break;
1664
1665 case TK_IDENTIFIER:
1666 if (s.method.text.assert_not_none() == C.pool("array")) // ok
1667 break;
1668 else if (s.method.text == C.pool("rmi")) // ok
1669 break;
1670 else { // unknown method, not ok
1671 diag.e(s.method.pos) << "Index method " << s.method.text << " not supported.\n";
1672 return;
1673 }
1674
1675 default: // unknown token type, not ok
1676 diag.e(s.method.pos) << "Index method " << s.method.text << " not supported.\n";
1677 return;
1678 }
1679
1680 /* Check that at most one key field is set. */
1681 if (s.key_fields.size() > 1) {
1682 diag.err() << "More than one key field for indexes not supported.\n";
1683 return;
1684 }
1685
1686 /* Compute attribute from key field. */
1687 for (auto it = s.key_fields.cbegin(), end = s.key_fields.cend(); it != end; ++it) {
1688 auto field = it->get();
1689 if (auto d = cast<Designator>(field)) {
1690 if (not table.has_attribute(d->attr_name.text.assert_not_none())) {
1691 diag.e(d->tok.pos) << "Attribute " << d->attr_name.text << " does not exists in table "
1692 << table_name << ".\n";
1693 return;
1694 }
1695 } else {
1696 diag.e(field->tok.pos) << "Non-attribute key fields for indexes not supported.\n";
1697 return;
1698 }
1699 }
1700 auto attribute_name = cast<Designator>(s.key_fields.front())->attr_name.text.assert_not_none();
1701 auto &attribute = table.at(attribute_name);
1702
1703 /* Build index based on selected method and key type. */
1704 std::unique_ptr<idx::IndexBase> index;
1705 auto make_index = [&]<template<typename> typename Index, typename Key>() {
1706 if constexpr(requires { typename Index<Key>; }) {
1707 return std::make_unique<Index<Key>>();
1708 } else {
1709 diag(s.method.pos) << "Index method not available for given key type.\n";
1710 return nullptr;
1711 }
1712 };
1713 auto set_index = [&]<template<typename> typename Index>() {
1715 [&](const Boolean&) { index = make_index.operator()<Index, bool>(); },
1716 [&](const Numeric &n) {
1717 switch (n.kind) {
1718 case Numeric::N_Int:
1719 case Numeric::N_Decimal:
1720 switch (n.size()) {
1721 default: M_unreachable("invalid size");
1722 case 8: index = make_index.operator()<Index, int8_t>(); break;
1723 case 16: index = make_index.operator()<Index, int16_t>(); break;
1724 case 32: index = make_index.operator()<Index, int32_t>(); break;
1725 case 64: index = make_index.operator()<Index, int64_t>(); break;
1726 }
1727 break;
1728 case Numeric::N_Float:
1729 switch (n.size()) {
1730 default: M_unreachable("invalid size");
1731 case 32: index = make_index.operator()<Index, float>(); break;
1732 case 64: index = make_index.operator()<Index, double>(); break;
1733 }
1734 }
1735 },
1736 [&](const CharacterSequence&) { index = make_index.operator()<Index, const char*>(); },
1737 [&](const Date&) { index = std::make_unique<Index<int32_t>>(); },
1738 [&](const DateTime&) { index = std::make_unique<Index<int64_t>>(); },
1739 [](auto&&) { M_unreachable("invalid type"); },
1740 }, *attribute.type);
1741 };
1742 switch(s.method.type) {
1743 case TK_Default: set_index.operator()<idx::ArrayIndex>(); break;
1744 case TK_IDENTIFIER:
1745 if (s.method.text.assert_not_none() == C.pool("array"))
1746 set_index.operator()<idx::ArrayIndex>();
1747 else if (s.method.text == C.pool("rmi"))
1748 set_index.operator()<idx::RecursiveModelIndex>();
1749 break;
1750 default:
1751 M_unreachable("invalid token type");
1752 }
1753 if (not index) // No index was set
1754 return;
1755
1756 command_ = std::make_unique<CreateIndex>(std::move(index), std::move(table_name), std::move(attribute_name),
1757 std::move(index_name));
1758}
1759
1760void Sema::operator()(DropIndexStmt &s)
1761{
1762 RequireContext RCtx(this, s);
1763 Catalog &C = Catalog::Get();
1764
1765 if (not C.has_database_in_use()) {
1766 diag.err() << "No database selected.\n";
1767 return;
1768 }
1769 auto &DB = C.get_database_in_use();
1770
1771 bool ok = true;
1772 std::vector<ThreadSafePooledString> index_names;
1773 for (auto &tok : s.index_names) {
1774 auto index_name = tok->text.assert_not_none();
1775 if (DB.has_index(index_name))
1776 index_names.emplace_back(index_name);
1777 else {
1778 if (not s.has_if_exists) {
1779 diag.e(tok->pos) << "Index " << index_name << " does not exist in database " << DB.name << ".\n";
1780 ok = false;
1781 } else {
1782 diag.w(tok->pos) << "Index " << index_name << " does not exist in database " << DB.name << ". "
1783 << "Skipping.\n";
1784 }
1785 }
1786 }
1787 if (ok)
1788 command_ = std::make_unique<DropIndex>(std::move(index_names));
1789}
1790
1791void Sema::operator()(SelectStmt &s)
1792{
1793 RequireContext RCtx(this, s);
1794 Catalog &C = Catalog::Get();
1795
1796 if (s.from) {
1797 if (not C.has_database_in_use()) {
1798 diag.err() << "No database selected.\n";
1799 return;
1800 }
1801 (*this)(*s.from);
1802 }
1803 if (s.where) (*this)(*s.where);
1804 if (s.group_by) (*this)(*s.group_by);
1805 if (s.having) (*this)(*s.having);
1806 (*this)(*s.select);
1807 if (s.order_by) (*this)(*s.order_by);
1808 if (s.limit) (*this)(*s.limit);
1809
1810
1811 if (not is_nested() and not diag.num_errors())
1812 command_ = std::make_unique<QueryDatabase>();
1813}
1814
1815void Sema::operator()(InsertStmt &s)
1816{
1817 RequireContext RCtx(this, s);
1818 Catalog &C = Catalog::Get();
1819
1820 if (not C.has_database_in_use()) {
1821 diag.e(s.table_name.pos) << "No database in use.\n";
1822 return;
1823 }
1824 auto &DB = C.get_database_in_use();
1825
1826 const Table *tbl;
1827 try {
1828 tbl = &DB.get_table(s.table_name.text.assert_not_none());
1829 } catch (std::out_of_range) {
1830 diag.e(s.table_name.pos) << "Table " << s.table_name.text << " does not exist in database " << DB.name << ".\n";
1831 return;
1832 }
1833
1834 /* Analyze values. */
1835 for (std::size_t i = 0; i != s.tuples.size(); ++i) {
1836 auto &t = s.tuples[i];
1837 if (t.empty())
1838 continue; // syntax error, already reported
1839 if (t.size() != tbl->num_attrs()) {
1840 diag.e(s.table_name.pos) << "Tuple " << (i + 1) << " has not enough values.\n";
1841 continue;
1842 }
1843 for (auto [it, j] = std::tuple{tbl->begin(), 0}; it != tbl->end(); ++it, ++j) {
1844 auto &v = t[j];
1845 auto &attr = *it;
1846 switch (v.first) {
1847 case InsertStmt::I_Expr: {
1848 (*this)(*v.second);
1849 if (v.second->type()->is_error()) continue;
1850 auto ty = as<const PrimitiveType>(v.second->type());
1851 if (ty->is_boolean() and attr.type->is_boolean())
1852 break;
1853 if (ty->is_character_sequence() and attr.type->is_character_sequence())
1854 break;
1855 if (ty->is_date() and attr.type->is_date())
1856 break;
1857 if (ty->is_date_time() and attr.type->is_date_time())
1858 break;
1859 if (ty->is_numeric() and attr.type->is_numeric())
1860 break;
1861 diag.e(s.table_name.pos) << "Value " << *v.second << " is not valid for attribute "
1862 << attr.name << ".\n";
1863 break;
1864 }
1865
1866 case InsertStmt::I_Null: {
1867 if (attr.not_nullable)
1868 diag.e(s.table_name.pos) << "Value NULL is not valid for attribute " << attr.name
1869 << " declared as NOT NULL.\n";
1870 break;
1871 }
1872
1874 /* TODO has default? */
1875 break;
1876 }
1877 }
1878 }
1879
1880 if (not is_nested() and not diag.num_errors())
1881 command_ = std::make_unique<InsertRecords>();
1882}
1883
1884void Sema::operator()(UpdateStmt &s)
1885{
1886 RequireContext RCtx(this, s);
1887 /* TODO */
1888 (void) s;
1889 M_unreachable("Not implemented.");
1890}
1891
1892void Sema::operator()(DeleteStmt &s)
1893{
1894 RequireContext RCtx(this, s);
1895 /* TODO */
1896 (void) s;
1897 M_unreachable("Not implemented.");
1898}
1899
1900void Sema::operator()(DSVImportStmt &s)
1901{
1902 RequireContext RCtx(this, s);
1903 auto &C = Catalog::Get();
1904
1905 if (not C.has_database_in_use()) {
1906 diag.e(s.table_name.pos) << "No database selected\n";
1907 return;
1908 }
1909 auto &DB = C.get_database_in_use();
1910
1911 const Table *table = nullptr;
1912 try {
1913 table = &DB.get_table(s.table_name.text.assert_not_none());
1914 } catch (std::out_of_range) {
1915 diag.e(s.table_name.pos) << "Table " << s.table_name.text << " does not exist in database " << DB.name << ".\n";
1916 }
1917
1919 cfg.has_header = s.has_header;
1920 cfg.skip_header = s.skip_header;
1921 if (s.rows)
1922 cfg.num_rows = atoi(*s.rows.text);
1923
1924 /* If character was provided by user, check that length is equal to 1. */
1925#define SET_CHAR(NAME) \
1926 if (s.NAME) { \
1927 std::string NAME = interpret(*s.NAME.text); \
1928 if (NAME.length() == 1) \
1929 cfg.NAME = NAME[0]; \
1930 else \
1931 diag.e(s.NAME.pos) << "Invalid " #NAME " character " << s.NAME.text << ". Must have length 1.\n"; \
1932 }
1933 SET_CHAR(delimiter);
1934 SET_CHAR(quote);
1936#undef SET_CHAR
1937
1938 /* Delimiter and quote character must be distinct. */
1939 if (cfg.delimiter == cfg.quote) {
1940 auto pos = s.delimiter ? s.delimiter.pos : s.quote.pos;
1941 diag.e(pos) << "The delimiter (" << cfg.delimiter << ") must differ from the quote character (" << cfg.quote
1942 << ").\n";
1943 }
1944
1945 /* Sanity check for skip header. */
1946 if (cfg.skip_header and not cfg.has_header) {
1947 if (not Options::Get().quiet)
1948 diag.n(s.path.pos) << "I will assume the existence of a header so I can skip it.\n";
1949 }
1950
1951 /* Get filesystem path from path token by removing surrounding quotation marks. */
1952 std::filesystem::path path(std::string(*s.path.text, 1, strlen(*s.path.text) - 2));
1953
1954 if (not diag.num_errors())
1955 command_ = std::make_unique<ImportDSV>(*table, path, std::move(cfg));
1956}
#define q(X)
#define SET_CHAR(NAME)
#define M_unreachable(MSG)
Definition: macro.hpp:146
#define M_insist(...)
Definition: macro.hpp:129
::wasm::Expression * expr()
Moves the underlying Binaryen ::wasm::Expression out of this.
Definition: WasmDSL.hpp:1558
Bool< L > value
Definition: WasmUtil.hpp:1317
Bool< L > uint8_t n
Definition: WasmUtil.hpp:1318
for(std::size_t idx=1;idx< num_vectors;++idx) res.emplace((vectors_[idx].bitmask()<< uint32_t(idx *vector_type return * res
Definition: WasmDSL.hpp:3696
std::string unique(std::string prefix, unsigned &counter)
Creates a unique name from a given prefix and a counter.
Definition: WasmDSL.hpp:433
‍mutable namespace
Definition: Backend.hpp:10
std::string escape(char c)
Definition: fn.hpp:292
bool M_EXPORT is_comparable(const Type *first, const Type *second)
Returns true iff both types have the same PrimitiveType, i.e.
Definition: Type.hpp:547
const Numeric * arithmetic_join(const Numeric *lhs, const Numeric *rhs)
Definition: Type.cpp:24
std::string quote(const std::string &str)
Definition: fn.hpp:306
T(x)
ThreadSafeStringPool::proxy_type ThreadSafePooledString
Definition: Pool.hpp:464
and
Definition: enum_ops.hpp:12
and arithmetic< U > and same_signedness< T, U > U
Definition: concepts.hpp:90
ThreadSafeStringPool::proxy_optional_type ThreadSafePooledOptionalString
Definition: Pool.hpp:465
std::string interpret(const std::string &str, char esc='\\', char quote='"')
Definition: fn.hpp:319
auto visit(Callable &&callable, Base &obj, m::tag< Callable > &&=m::tag< Callable >())
Generic implementation to visit a class hierarchy, with similar syntax as std::visit.
Definition: Visitor.hpp:138
An attribute of a table.
Definition: Schema.hpp:289
const PrimitiveType * type
the type of the attribute
Definition: Schema.hpp:294
The boolean type.
Definition: Type.hpp:230
The catalog contains all Databases and keeps track of all meta information of the database system.
Definition: Catalog.hpp:215
Database & get_database_in_use()
Returns a reference to the Database that is currently in use, if any.
Definition: Catalog.hpp:295
bool has_database_in_use() const
Returns true if any Database is currently in use.
Definition: Catalog.hpp:293
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
Definition: Catalog.hpp:274
static Catalog & Get()
Return a reference to the single Catalog instance.
bool has_database(const ThreadSafePooledString &name) const
Returns true iff a Database with the given name exists.
Definition: Catalog.hpp:285
std::unique_ptr< DatabaseInstruction > create_instruction(const ThreadSafePooledString &name, const std::vector< std::string > &args) const
Returns a reference to the DatabaseInstruction with the given name.
Definition: Catalog.hpp:528
const Function * get_function(const ThreadSafePooledString &name) const
Returns a reference to the Function with the given name.
Definition: Catalog.hpp:310
std::unique_ptr< TableFactory > table_factory(std::unique_ptr< TableFactory > table_factory)
Replaces the stored TableFactory with table_factory and returns the old TableFactory.
Definition: Catalog.hpp:572
The type of character strings, both fixed length and varying length.
Definition: Type.hpp:290
Configuration parameters for importing a DSV file.
Definition: Reader.hpp:45
char delimiter
‍the delimiter separating cells
Definition: Reader.hpp:47
char quote
‍the quotation mark for strings
Definition: Reader.hpp:49
bool skip_header
‍whether to ignore the headline (requires has_header = true)
Definition: Reader.hpp:55
bool has_header
‍whether the first line of the file is a headline describing the columns
Definition: Reader.hpp:53
std::size_t num_rows
‍the maximum number of rows to read from the file (may exceed actual number of rows)
Definition: Reader.hpp:57
ThreadSafePooledString name
the name of the database
Definition: Schema.hpp:892
The date type.
Definition: Type.hpp:364
The date type.
Definition: Type.hpp:335
std::ostream & e(const Position pos)
Definition: Diagnostic.hpp:41
std::ostream & n(const Position pos)
Definition: Diagnostic.hpp:31
unsigned num_errors() const
Returns the number of errors emitted since the last call to clear().
Definition: Diagnostic.hpp:48
std::ostream & w(const Position pos)
Definition: Diagnostic.hpp:36
std::ostream & err()
Definition: Diagnostic.hpp:53
fnid_t fnid
the function id
Definition: Schema.hpp:841
bool is_aggregate() const
Returns true iff this function is an aggregation, i.e. if it is evaluated on all tuples.
Definition: Schema.hpp:852
The numeric type represents integer and floating-point types of different precision and scale.
Definition: Type.hpp:393
static constexpr std::size_t MAX_DECIMAL_PRECISION
The maximal number of decimal digits that can be accurately represented by DECIMAL(p,...
Definition: Type.hpp:397
unsigned scale
the number of decimal digits right of the decimal point
Definition: Type.hpp:415
virtual const PrimitiveType * as_scalar() const override
Convert this PrimitiveType to its scalar equivalent.
static Options & Get()
Return a reference to the single Options instance.
Definition: Options.cpp:9
A data type representing a pooled (or internalized) object.
Definition: Pool.hpp:168
Pooled< T, Pool, false > assert_not_none() const
Definition: Pool.hpp:239
const char * name
Definition: Position.hpp:13
PrimitiveTypes represent Types of values.
Definition: Type.hpp:159
bool is_scalar() const
Returns true iff this PrimitiveType is scalar, i.e. if it is for a single value.
Definition: Type.hpp:168
virtual const PrimitiveType * as_vectorial() const =0
Convert this PrimitiveType to its vectorial equivalent.
category_t category
whether this type is scalar or vector
Definition: Type.hpp:160
bool is_vectorial() const
Returns true iff this PrimitiveType is vectorial, i.e. if it is for a sequence of values.
Definition: Type.hpp:170
A table is a sorted set of attributes.
Definition: Schema.hpp:388
virtual iterator end() const =0
virtual Attribute & at(std::size_t id)=0
Returns the attribute with the given id.
virtual std::size_t num_attrs() const =0
Returns the number of attributes in this table.
virtual iterator begin() const =0
This class represents types in the SQL type system.
Definition: Type.hpp:46
static Pooled< CharacterSequence > Get_Char(category_t category, std::size_t length)
Returns a CharacterSequence type of the given category and fixed length.
Definition: Type.cpp:75
static Pooled< NoneType > Get_None()
Returns a NoneType.
Definition: Type.cpp:66
static Pooled< Numeric > Get_Double(category_t category)
Returns a Numeric type of given category for 64 bit floating-points.
Definition: Type.cpp:104
static M_LCOV_EXCL_STOP Pooled< ErrorType > Get_Error()
Returns a ErrorType.
Definition: Type.cpp:64
bool is_primitive() const
Returns true iff this Type is a PrimitiveType.
Definition: Type.hpp:74
static Pooled< Numeric > Get_Decimal(category_t category, unsigned digits, unsigned scale)
Returns a Numeric type for decimals of given category, decimal digits, and scale.
Definition: Type.cpp:89
static Pooled< Date > Get_Date(category_t category)
Returns a Date type of the given category.
Definition: Type.cpp:85
static Pooled< Boolean > Get_Boolean(category_t category)
Returns a Boolean type of the given category.
Definition: Type.cpp:68
static Pooled< DateTime > Get_Datetime(category_t category)
Returns a DateTime type of the given category.
Definition: Type.cpp:87
bool is_error() const
Definition: Type.hpp:71
static Pooled< Numeric > Get_Integer(category_t category, unsigned num_bytes)
Returns a Numeric type for integrals of given category and num_bytes bytes.
Definition: Type.cpp:94
static Pooled< FnType > Get_Function(const Type *return_type, std::vector< const Type * > parameter_types)
Returns a FnType for a function with parameter types parameter_types and return type return_type.
Definition: Type.cpp:109
A binary expression.
Definition: AST.hpp:348
std::unique_ptr< Expr > lhs
Definition: AST.hpp:349
const Numeric * common_operand_type
Definition: AST.hpp:351
std::unique_ptr< Expr > rhs
Definition: AST.hpp:350
Token op() const
Definition: AST.hpp:377
Token tok
Definition: AST.hpp:500
A constant: a string literal or a numeric constant.
Definition: AST.hpp:213
std::vector< std::unique_ptr< Expr > > key_fields
Definition: AST.hpp:904
std::vector< std::unique_ptr< attribute_definition > > attributes
Definition: AST.hpp:872
An import statement for a delimiter separated values (DSV) file.
Definition: AST.hpp:1027
A SQL delete statement.
Definition: AST.hpp:1005
A designator.
Definition: AST.hpp:134
Token attr_name
Definition: AST.hpp:139
std::variant< std::monostate, const Expr *, const Attribute * > target_type
Definition: AST.hpp:137
std::vector< std::unique_ptr< Token > > index_names
Definition: AST.hpp:922
std::vector< std::unique_ptr< Token > > table_names
Definition: AST.hpp:885
The error expression.
Definition: AST.hpp:116
The error statement.
Definition: AST.hpp:803
An expression.
Definition: AST.hpp:39
const Type * type_
‍the type of an expression, determined by the semantic analysis
Definition: AST.hpp:47
const Type * type() const
Returns the Type of this Expr.
Definition: AST.hpp:58
Token tok
the token of the expression; serves as an anchor to locate the expression in the source
Definition: AST.hpp:43
A function application.
Definition: AST.hpp:246
std::vector< std::unique_ptr< Expr > > args
Definition: AST.hpp:250
std::unique_ptr< Expr > fn
Definition: AST.hpp:249
const Function * func_
Definition: AST.hpp:252
std::vector< from_type > from
Definition: AST.hpp:563
std::vector< group_type > group_by
Definition: AST.hpp:588
std::unique_ptr< Expr > having
Definition: AST.hpp:601
Token table_name
Definition: AST.hpp:1021
A SQL insert statement.
Definition: AST.hpp:967
Token table_name
Definition: AST.hpp:972
std::vector< tuple_t > tuples
Definition: AST.hpp:973
Token tok
‍the token of the Instruction; starts with \
Definition: AST.hpp:771
ThreadSafePooledString name
‍the name of the Instruction (without leading \‍)
Definition: AST.hpp:773
std::vector< std::string > args
‍the arguments to the Instruction; may be empty
Definition: AST.hpp:775
std::vector< order_type > order_by
‍true means ascending, false means descending
Definition: AST.hpp:616
A query expression for nested queries.
Definition: AST.hpp:389
std::unique_ptr< Stmt > query
Definition: AST.hpp:390
const ThreadSafePooledString & alias() const
Definition: AST.hpp:421
std::vector< select_type > select
‍list of selected elements; expr AS name
Definition: AST.hpp:528
std::vector< std::unique_ptr< Expr > > expanded_select_all
Definition: AST.hpp:530
A SQL select statement.
Definition: AST.hpp:936
std::unique_ptr< Clause > from
Definition: AST.hpp:938
std::unique_ptr< Clause > select
Definition: AST.hpp:937
std::unique_ptr< Clause > where
Definition: AST.hpp:939
std::unique_ptr< Clause > order_by
Definition: AST.hpp:942
std::unique_ptr< Clause > having
Definition: AST.hpp:941
std::unique_ptr< Clause > group_by
Definition: AST.hpp:940
std::unique_ptr< Clause > limit
Definition: AST.hpp:943
source_table sources
‍list of all sources along with their order
Definition: Sema.hpp:63
std::unordered_multimap< ThreadSafePooledString, std::pair< std::reference_wrapper< Expr >, unsigned > > named_expr_table
‍list of all computed expressions along with their order
Definition: Sema.hpp:57
std::variant< std::monostate, std::reference_wrapper< const Table >, named_expr_table > source_type
‍the type of a source of data: either a database table or a nested query with named results
Definition: Sema.hpp:59
enum m::ast::Sema::SemaContext::stage_t stage
current stage
std::ostringstream oss
‍used to create textual representation of complex AST objects, e.g. expressions
Definition: Sema.hpp:103
Diagnostic & diag
Definition: Sema.hpp:97
ThreadSafePooledOptionalString make_unique_id_from_binding_path(context_stack_t::reverse_iterator current_ctx, context_stack_t::reverse_iterator binding_ctx)
Creates a unique ID from a sequence of SemaContexts by concatenating their aliases.
Definition: Sema.cpp:72
context_stack_t contexts_
Definition: Sema.hpp:101
void replace_by_fresh_designator_to(std::unique_ptr< Expr > &to_replace, const Expr &target)
Replaces to_replace by a fresh Designator, that has the same syntactical representation as to_replace...
Definition: Sema.cpp:61
SemaContext pop_context()
Definition: Sema.hpp:129
SemaContext & get_context()
Definition: Sema.hpp:135
std::unique_ptr< DatabaseCommand > command_
‍the command to execute when semantic analysis completes without errors
Definition: Sema.hpp:105
bool is_nested() const
Returns true iff the current statement, that is being analyzed, is a nested statement.
Definition: Sema.cpp:23
std::unique_ptr< Designator > create_designator(ThreadSafePooledString name, Token tok, const Expr &target)
Creates a fresh Designator with the given name at location tok and with target target.
Definition: Sema.cpp:32
void compose_of(std::unique_ptr< ast::Expr > &ptr, const std::vector< std::reference_wrapper< ast::Expr > > components)
Recursively analyzes the ast::Expr referenced by ptr and replaces subexpressions that can be composed...
Definition: Sema.cpp:115
std::unique_ptr< DatabaseCommand > analyze(std::unique_ptr< ast::Command > ast)
Perform semantic analysis of an ast::Command.
Definition: Sema.cpp:15
bool is_composable_of(const ast::Expr &expr, const std::vector< std::reference_wrapper< ast::Expr > > components)
Computes whether the bound parts of expr are composable of elements in components.
Definition: Sema.cpp:88
TokenType type
Definition: Token.hpp:17
ThreadSafePooledOptionalString text
declared as optional for dummy tokens
Definition: Token.hpp:16
static Token CreateArtificial(TokenType type=TK_EOF)
Definition: Token.hpp:29
Position pos
Definition: Token.hpp:15
A unary expression: "+e", "-e", "~e", "NOT e".
Definition: AST.hpp:324
Token op() const
Definition: AST.hpp:336
std::unique_ptr< Expr > expr
Definition: AST.hpp:325
A SQL update statement.
Definition: AST.hpp:986
std::unique_ptr< Expr > where
Definition: AST.hpp:574
A simple index based on a sorted array that maps keys to their tuple_id.
Definition: Index.hpp:53
A recursive model index with two layers consiting only of linear monels that maps keys to their tuple...
Definition: Index.hpp:137
Definition: tag.hpp:8