mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
Parser.cpp
Go to the documentation of this file.
1#include "parse/Parser.hpp"
2
3#include <cerrno>
4#include <cstdlib>
5#include <initializer_list>
8#include <utility>
9
10
11using namespace m;
12using namespace m::ast;
13
14
15namespace {
16
18int get_precedence(const TokenType tt)
19{
20 int p = 0;
21 /* List all binary operators. The higher up an operator is in the switch statement, the higher its precedence. */
22 switch (tt) {
23 default: return -1;
24 /* bitwise NOT */
25 case TK_TILDE: ++p;
26 /* multiplicative */
27 case TK_ASTERISK:
28 case TK_SLASH:
29 case TK_PERCENT: ++p;
30 /* additive */
31 case TK_PLUS:
32 case TK_MINUS: ++p;
33 /* string concat */
34 case TK_DOTDOT: ++p;
35 /* comparison */
36 case TK_LESS:
37 case TK_GREATER:
38 case TK_LESS_EQUAL:
39 case TK_GREATER_EQUAL:
40 case TK_EQUAL:
41 case TK_BANG_EQUAL:
42 case TK_Like: ++p;
43 /* logical NOT */
44 case TK_Not: ++p;
45 /* logical AND */
46 case TK_And: ++p;
47 /* logical OR */
48 case TK_Or: ++p;
49 }
50 return p;
51}
52
54bool is_integer(TokenType tt)
55{
56 switch (tt) {
57 case TK_OCT_INT:
58 case TK_DEC_INT:
59 case TK_HEX_INT:
60 return true;
61
62 default:
63 return false;
64 }
65}
66
67}
68
69
70/*======================================================================================================================
71 * Follow sets
72 *====================================================================================================================*/
73
74namespace {
75
76constexpr Parser::follow_set_t make_follow_set(std::initializer_list<TokenType> tokens)
77{
79 for (TokenType tk : tokens) {
80 M_insist(tk < TokenType::TokenType_MAX);
81 F[tk] = true;
82 }
83 return F;
84}
85
86#define M_FOLLOW(NAME, SET) \
87const Parser::follow_set_t follow_set_##NAME = make_follow_set SET ;
88#include "tables/FollowSet.tbl"
89#undef M_FOLLOW
90
91}
92
93
94/*======================================================================================================================
95 * Parser
96 *====================================================================================================================*/
97
98std::unique_ptr<Command> Parser::parse()
99{
100 if (token().type == TK_INSTRUCTION)
101 return parse_Instruction();
102 else
103 return parse_Stmt();
104}
105
106std::unique_ptr<Instruction> Parser::parse_Instruction()
107{
108 auto &C = Catalog::Get();
109 M_insist(is(TK_INSTRUCTION));
110
111 Token instr = consume();
112 std::string_view sv(*(instr.text));
113 const char *delimiter = " \n";
114
115 /*----- Isolate the instruction's name -----*/
116 std::string::size_type end = sv.find_first_of(delimiter);
117 ThreadSafePooledString instruction_name = C.pool(sv.substr(1, end - 1)); // skip leading `\`
118
119 /*----- Separate the arguments. -----*/
120 std::vector<std::string> args;
121 for (;;) {
122 std::string::size_type start = sv.find_first_not_of(delimiter, end);
123 if (start == std::string::npos)
124 break;
125 end = sv.find_first_of(delimiter, start);
126 args.emplace_back(sv.substr(start, end - start));
127 }
128
129 expect(TK_SEMICOL);
130 return std::make_unique<Instruction>(std::move(instr), std::move(instruction_name), std::move(args));
131}
132
133std::unique_ptr<Stmt> Parser::parse_Stmt()
134{
135 Token start = token();
136 std::unique_ptr<Stmt> stmt = nullptr;
137 switch (token().type) {
138 default:
139 stmt = std::make_unique<ErrorStmt>(token());
140 diag.e(token().pos) << "expected a statement, got " << token().text << '\n';
141 consume();
142 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
143
144 case TK_SEMICOL: return std::make_unique<EmptyStmt>(consume());
145
146 case TK_Create:
147 switch (token<1>().type) {
148 default:
149 stmt = std::make_unique<ErrorStmt>(token());
150 diag.e(token<1>().pos) << "expected a create database statement or a create table statement, got "
151 << token<1>().text << '\n';
152 consume();
153 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
154
155 case TK_Database: stmt = parse_CreateDatabaseStmt(); break;
156 case TK_Table: stmt = parse_CreateTableStmt(); break;
157 case TK_Unique:
158 case TK_Index: stmt = parse_CreateIndexStmt(); break;
159 }
160 break;
161
162 case TK_Drop:
163 switch (token<1>().type) {
164 default:
165 stmt = std::make_unique<ErrorStmt>(token());
166 diag.e(token().pos) << "expected a drop database, table, or index statement, got "
167 << token().text << '\n';
168 consume();
169 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
170
171 case TK_Database: stmt = parse_DropDatabaseStmt(); break;
172 case TK_Table: stmt = parse_DropTableStmt(); break;
173 case TK_Index: stmt = parse_DropIndexStmt(); break;
174 }
175 break;
176
177 case TK_Use: stmt = parse_UseDatabaseStmt(); break;
178 case TK_Select: stmt = parse_SelectStmt(); break;
179 case TK_Insert: stmt = parse_InsertStmt(); break;
180 case TK_Update: stmt = parse_UpdateStmt(); break;
181 case TK_Delete: stmt = parse_DeleteStmt(); break;
182 case TK_Import: stmt = parse_ImportStmt(); break;
183 }
184 expect(TK_SEMICOL);
185 return stmt;
186}
187
188/*======================================================================================================================
189 * statements
190 *====================================================================================================================*/
191
192std::unique_ptr<Stmt> Parser::parse_CreateDatabaseStmt()
193{
194 Token start = token();
195
196 /* 'CREATE' 'DATABASE' identifier */
197 if (not expect(TK_Create)) {
198 consume();
199 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
200 }
201
202 if (not expect(TK_Database))
203 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
204
205 Token database_name = token();
206 if (not expect(TK_IDENTIFIER))
207 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
208
209 return std::make_unique<CreateDatabaseStmt>(std::move(database_name));
210}
211
212std::unique_ptr<Stmt> Parser::parse_DropDatabaseStmt()
213{
214 Token start = token();
215
216 /* 'DROP' 'DATABASE' */
217 if (not expect(TK_Drop)) {
218 consume();
219 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
220 }
221
222 if (not expect(TK_Database))
223 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
224
225 /* [ 'IF' 'EXISTS' ] */
226 bool has_if_exists = false;
227 if (accept(TK_If)) {
228 if (not expect(TK_Exists))
229 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
230 has_if_exists = true;
231 }
232
233 /* identifier */
234 Token database_name = token();
235 if (not expect(TK_IDENTIFIER))
236 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
237
238 return std::make_unique<DropDatabaseStmt>(std::move(database_name), has_if_exists);
239}
240
241std::unique_ptr<Stmt> Parser::parse_UseDatabaseStmt()
242{
243 Token start = token();
244
245 /* 'USE' identifier */
246 if (not expect(TK_Use)) {
247 consume();
248 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
249 }
250
251 Token database_name = token();
252 if (not expect(TK_IDENTIFIER))
253 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
254
255 return std::make_unique<UseDatabaseStmt>(std::move(database_name));
256}
257
258std::unique_ptr<Stmt> Parser::parse_CreateTableStmt()
259{
260 Token start = token();
261 std::vector<std::unique_ptr<CreateTableStmt::attribute_definition>> attrs;
262
263 /* 'CREATE' 'TABLE' identifier '(' */
264 if (not expect(TK_Create)) {
265 consume();
266 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
267 }
268
269 if (not expect(TK_Table))
270 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
271
272 Token table_name = token();
273 if (not expect(TK_IDENTIFIER))
274 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
275
276 if (not expect(TK_LPAR))
277 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
278
279 /* identifier data-type { constraint } [ ',' identifier data-type { constraint } ] */
280 do {
281 Token id = token();
282 if (not expect(TK_IDENTIFIER))
283 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
284
285 const Type *type = M_notnull(parse_data_type());
286
287 /* Parse the list of constraints. */
288 std::vector<std::unique_ptr<Constraint>> constraints;
289 for (;;) {
290 switch (token().type) {
291 /* 'PRIMARY' 'KEY' */
292 case TK_Primary: {
293 Token tok = consume();
294 if (not expect(TK_Key)) goto constraint_error_recovery;
295 constraints.push_back(std::make_unique<PrimaryKeyConstraint>(std::move(tok)));
296 break;
297 }
298
299 /* 'NOT' 'NULL' */
300 case TK_Not: {
301 Token tok = consume();
302 if (not expect(TK_Null)) goto constraint_error_recovery;
303 constraints.push_back(std::make_unique<NotNullConstraint>(std::move(tok)));
304 break;
305 }
306
307 /* 'UNIQUE' */
308 case TK_Unique: {
309 Token tok = consume();
310 constraints.push_back(std::make_unique<UniqueConstraint>(std::move(tok)));
311 break;
312 }
313
314 /* 'CHECK' '(' expression ')' */
315 case TK_Check: {
316 Token tok = consume();
317 if (not expect(TK_LPAR)) goto constraint_error_recovery;
318 std::unique_ptr<Expr> cond = parse_Expr();
319 if (not expect(TK_RPAR)) goto constraint_error_recovery;
320 constraints.push_back(std::make_unique<CheckConditionConstraint>(std::move(tok), std::move(cond)));
321 break;
322 }
323
324 /* 'REFERENCES' identifier '(' identifier ')' */
325 case TK_References: {
326 Token tok = consume();
327 Token ref_table_name = token();
328 if (not expect(TK_IDENTIFIER)) goto constraint_error_recovery;
329 if (not expect(TK_LPAR)) goto constraint_error_recovery;
330 Token attr_name = token();
331 if (not expect(TK_IDENTIFIER)) goto constraint_error_recovery;
332 if (not expect(TK_RPAR)) goto constraint_error_recovery;
333 constraints.push_back(std::make_unique<ReferenceConstraint>(
334 std::move(tok),
335 std::move(ref_table_name),
336 std::move(attr_name),
338 );
339 break;
340 }
341
342 default:
343 goto exit_constraints;
344 }
345
346 }
347constraint_error_recovery:
348 recover(follow_set_CONSTRAINT);
349exit_constraints:
350 attrs.push_back(std::make_unique<CreateTableStmt::attribute_definition>(std::move(id),
351 std::move(type),
352 std::move(constraints)));
353 } while (accept(TK_COMMA));
354
355 /* ')' */
356 if (not expect(TK_RPAR))
357 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
358
359 return std::make_unique<CreateTableStmt>(std::move(table_name), std::move(attrs));
360}
361
362std::unique_ptr<Stmt> Parser::parse_DropTableStmt()
363{
364 Token start = token();
365
366 /* 'DROP' 'TABLE' */
367 if (not expect(TK_Drop)) {
368 consume();
369 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
370 }
371
372 if (not expect(TK_Table))
373 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
374
375 /* [ 'IF' 'EXISTS' ] */
376 bool has_if_exists = false;
377 if (accept(TK_If)) {
378 if (not expect(TK_Exists))
379 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
380 has_if_exists = true;
381 }
382
383 /* identifier { ',' identifier } */
384 std::vector<std::unique_ptr<Token>> table_names;
385 do {
386 Token table_name = token();
387 if (not expect(TK_IDENTIFIER))
388 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
389 table_names.emplace_back(std::make_unique<Token>(std::move(table_name)));
390 } while (accept(TK_COMMA));
391
392 return std::make_unique<DropTableStmt>(std::move(table_names), has_if_exists);
393}
394
395std::unique_ptr<Stmt> Parser::parse_CreateIndexStmt()
396{
397 Token start = token();
398
399 /* 'CREATE' [ 'UNIQUE' ] 'INDEX' */
400 if (not expect(TK_Create))
401 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
402
403 Token has_unique = Token::CreateArtificial();
404 if (token() == TK_Unique)
405 has_unique = consume();
406
407 if (not expect(TK_Index))
408 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
409
410 /* [ [ 'IF' 'NOT' 'EXISTS' ] identifier ] */
411 bool has_if_not_exists = false;
412 Token index_name = Token::CreateArtificial();
413 if (accept(TK_If)) {
414 if (not expect(TK_Not))
415 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
416 if (not expect(TK_Exists))
417 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
418 has_if_not_exists = true;
419 index_name = token();
420 if (not expect(TK_IDENTIFIER))
421 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
422 } else if (token().type == TK_IDENTIFIER)
423 index_name = consume();
424
425 /* 'ON' identifier */
426 if (not expect(TK_On))
427 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
428
429 Token table_name = token();
430 if (not expect(TK_IDENTIFIER))
431 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
432
433 /* [ 'USING' identifier */
435 if (accept(TK_Using)) {
436 if (token().type != TK_IDENTIFIER and token().type != TK_Default) {
437 diag.e(token().pos) << "expected an identifier or DEFAULT, got " << token().text << '\n';
438 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
439 }
440 method = consume();
441 }
442
443 /* '(' key_field { ',' key_field } ')' */
444 if (not expect(TK_LPAR))
445 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
446 std::vector<std::unique_ptr<Expr>> key_fields;
447 do {
448 switch (token().type) {
449 /* identifier */
450 case TK_IDENTIFIER: {
451 auto id = std::make_unique<Designator>(consume());
452 key_fields.emplace_back(std::move(id));
453 break;
454 }
455 /* '(' expression ')' */
456 case TK_LPAR: {
457 auto expr = parse_Expr();
458 key_fields.emplace_back(std::move(expr));
459 break;
460 }
461 default: {
462 diag.e(token().pos) << "expected an identifier or expression, got " << token().text << '\n';
463 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
464 }
465 }
466 } while(accept(TK_COMMA));
467 if (not expect(TK_RPAR))
468 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
469
470 return std::make_unique<CreateIndexStmt>(
471 /* has_unique= */ std::move(has_unique),
472 /* has_if_not_exists= */ has_if_not_exists,
473 /* index_name= */ std::move(index_name),
474 /* table_name= */ std::move(table_name),
475 /* method= */ std::move(method),
476 /* key_fields= */ std::move(key_fields)
477 );
478}
479
480std::unique_ptr<Stmt> Parser::parse_DropIndexStmt()
481{
482 Token start = token();
483
484 /* 'DROP' 'INDEX' */
485 if (not expect(TK_Drop)) {
486 consume();
487 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
488 }
489
490 if (not expect(TK_Index))
491 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
492
493 /* [ 'IF' 'EXISTS' ] */
494 bool has_if_exists = false;
495 if (accept(TK_If)) {
496 if (not expect(TK_Exists))
497 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
498 has_if_exists = true;
499 }
500
501 /* identifier { ',' identifier } */
502 std::vector<std::unique_ptr<Token>> index_names;
503 do {
504 Token index_name = token();
505 if (not expect(TK_IDENTIFIER))
506 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
507 index_names.emplace_back(std::make_unique<Token>(std::move(index_name)));
508 } while (accept(TK_COMMA));
509
510 return std::make_unique<DropIndexStmt>(std::move(index_names), has_if_exists);
511}
512
513std::unique_ptr<Stmt> Parser::parse_SelectStmt()
514{
515 std::unique_ptr<Clause> select = parse_SelectClause();
516 std::unique_ptr<Clause> from = nullptr;
517 std::unique_ptr<Clause> where = nullptr;
518 std::unique_ptr<Clause> group_by = nullptr;
519 std::unique_ptr<Clause> having = nullptr;
520 std::unique_ptr<Clause> order_by = nullptr;
521 std::unique_ptr<Clause> limit = nullptr;
522
523 if (token() == TK_From)
524 from = parse_FromClause();
525 if (token() == TK_Where)
526 where = parse_WhereClause();
527 if (token() == TK_Group)
528 group_by = parse_GroupByClause();
529 if (token() == TK_Having)
530 having = parse_HavingClause();
531 if (token() == TK_Order)
532 order_by = parse_OrderByClause();
533 if (token() == TK_Limit)
534 limit = parse_LimitClause();
535
536 return std::make_unique<SelectStmt>(std::move(select),
537 std::move(from),
538 std::move(where),
539 std::move(group_by),
540 std::move(having),
541 std::move(order_by),
542 std::move(limit));
543}
544
545std::unique_ptr<Stmt> Parser::parse_InsertStmt()
546{
547 Token start = token();
548
549 /* 'INSERT' 'INTO' identifier 'VALUES' */
550 if (not expect(TK_Insert)) {
551 consume();
552 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
553 }
554
555 if (not expect(TK_Into))
556 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
557
558 Token table_name = token();
559 if (not expect(TK_IDENTIFIER))
560 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
561
562 if (not expect(TK_Values))
563 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
564
565 /* tuple { ',' tuple } */
566 std::vector<InsertStmt::tuple_t> tuples;
567 do {
568 /* '(' ( 'DEFAULT' | 'NULL' | expression ) { ',' ( 'DEFAULT' | 'NULL' | expression ) } ')' */
570 if (not expect(TK_LPAR)) goto tuple_error_recovery;
571 do {
572 switch (token().type) {
573 case TK_Default:
574 consume();
575 tuple.emplace_back(InsertStmt::I_Default, nullptr);
576 break;
577
578 case TK_Null:
579 consume();
580 tuple.emplace_back(InsertStmt::I_Null, nullptr);
581 break;
582
583 default: {
584 auto e = parse_Expr();
585 tuple.emplace_back(InsertStmt::I_Expr, std::move(e));
586 break;
587 }
588 }
589 } while (accept(TK_COMMA));
590 if (not expect(TK_RPAR)) goto tuple_error_recovery;
591 tuples.emplace_back(std::move(tuple));
592 continue;
593tuple_error_recovery:
594 recover(follow_set_TUPLE);
595 } while (accept(TK_COMMA));
596
597 return std::make_unique<InsertStmt>(std::move(table_name), std::move(tuples));
598}
599
600std::unique_ptr<Stmt> Parser::parse_UpdateStmt()
601{
602 Token start = token();
603
604 /* 'UPDATE' identifier 'SET' */
605 if (not expect(TK_Update)) {
606 consume();
607 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
608 }
609
610 Token table_name = token();
611 if (not expect(TK_IDENTIFIER))
612 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
613
614 if (not expect(TK_Set))
615 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
616
617 /* identifier '=' expression { ',' identifier '=' expression } ; */
618 std::vector<UpdateStmt::set_type> set;
619 do {
620 auto id = token();
621 if (not expect(TK_IDENTIFIER))
622 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
623
624 if (not expect(TK_EQUAL))
625 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
626
627 auto e = parse_Expr();
628 set.emplace_back(id, std::move(e));
629 } while (accept(TK_COMMA));
630
631 /* [ where-clause ] */
632 std::unique_ptr<Clause> where = nullptr;
633 if (token() == TK_Where)
634 where = parse_WhereClause();
635
636 return std::make_unique<UpdateStmt>(std::move(table_name), std::move(set), std::move(where));
637}
638
639std::unique_ptr<Stmt> Parser::parse_DeleteStmt()
640{
641 Token start = token();
642
643 /* 'DELETE' 'FROM' identifier */
644 if (not expect(TK_Delete)) {
645 consume();
646 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
647 }
648
649 if (not expect(TK_From))
650 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
651
652 Token table_name = token();
653 if (not expect(TK_IDENTIFIER))
654 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
655
656 /*[ where-clause ] ; */
657 std::unique_ptr<Clause> where = nullptr;
658 if (token() == TK_Where)
659 where = parse_WhereClause();
660
661 return std::make_unique<DeleteStmt>(std::move(table_name), std::move(where));
662}
663
664std::unique_ptr<Stmt> Parser::parse_ImportStmt()
665{
666 Token start = token();
667
668 /* 'IMPORT' 'INTO' identifier */
669 if (not expect(TK_Import)) {
670 consume();
671 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
672 }
673
674 if (not expect(TK_Into))
675 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
676
677 Token table_name = token();
678 if (not expect(TK_IDENTIFIER))
679 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
680
681 switch (token().type) {
682 /* 'DSV' string-literal */
683 case TK_Dsv: {
684 consume();
685 DSVImportStmt stmt(std::move(table_name));
686 stmt.path = token();
687
688 if (not expect(TK_STRING_LITERAL))
689 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
690
691 /* [ 'ROWS' integer-constant ] */
692 if (accept(TK_Rows)) {
693 stmt.rows = token();
694 if (token() == TK_DEC_INT or token() == TK_OCT_INT) {
695 consume();
696 } else {
697 diag.e(token().pos) << "expected a decimal integer, got " << token().text << '\n';
698 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
699 }
700 }
701
702 /* [ 'DELIMITER' string-literal ] */
703 if (accept(TK_Delimiter)) {
704 stmt.delimiter = token();
705 if (not expect(TK_STRING_LITERAL))
706 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
707 }
708
709 /* [ 'ESCAPE' string-literal ] */
710 if (accept(TK_Escape)) {
711 stmt.escape = token();
712 if (not expect(TK_STRING_LITERAL))
713 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
714 }
715
716 /* [ 'QUOTE' string-literal ] */
717 if (accept(TK_Quote)) {
718 stmt.quote = token();
719 if (not expect(TK_STRING_LITERAL))
720 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
721 }
722
723 /* [ 'HAS' 'HEADER' ] */
724 if (accept(TK_Has)) {
725 if (not expect(TK_Header))
726 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
727 stmt.has_header = true;
728 }
729
730 /* [ 'SKIP' 'HEADER' ] */
731 if (accept(TK_Skip)) {
732 if (not expect(TK_Header))
733 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
734 stmt.skip_header = true;
735 }
736
737 return std::make_unique<DSVImportStmt>(stmt);
738 }
739
740 default:
741 diag.e(token().pos) << "Unrecognized input format \"" << token().text << "\".\n";
742 return recover<ErrorStmt>(std::move(start), follow_set_STATEMENT);
743 }
744}
745
746/*======================================================================================================================
747 * Clauses
748 *====================================================================================================================*/
749
750std::unique_ptr<Clause> Parser::parse_SelectClause()
751{
752 Token start = token();
753
754 /* 'SELECT' */
755 if (not expect(TK_Select)) {
756 consume();
757 return recover<ErrorClause>(std::move(start), follow_set_SELECT_CLAUSE);
758 }
759
760 /* ( '*' | expression [ [ 'AS' ] identifier ] ) */
761 Token select_all = Token::CreateArtificial();
762 std::vector<SelectClause::select_type> select;
763 if (token() == TK_ASTERISK) {
764 select_all = token();
765 consume();
766 } else {
767 auto e = parse_Expr();
769 if (accept(TK_As)) {
770 tok = token();
771 if (not expect(TK_IDENTIFIER))
772 return recover<ErrorClause>(std::move(start), follow_set_SELECT_CLAUSE);
773 } else if (token().type == TK_IDENTIFIER) {
774 tok = token();
775 consume();
776 }
777 select.emplace_back(std::move(e), std::move(tok));
778 }
779
780 /* { ',' expression [ [ 'AS' ] identifier ] } */
781 while (accept(TK_COMMA)) {
782 auto e = parse_Expr();
784 if (accept(TK_As)) {
785 tok = token();
786 if (not expect(TK_IDENTIFIER))
787 return recover<ErrorClause>(std::move(start), follow_set_SELECT_CLAUSE);
788 } else if (token().type == TK_IDENTIFIER) {
789 tok = token();
790 consume();
791 }
792 select.emplace_back(std::move(e), std::move(tok));
793 }
794
795 return std::make_unique<SelectClause>(std::move(start), std::move(select), std::move(select_all));
796}
797
798std::unique_ptr<Clause> Parser::parse_FromClause()
799{
800 Token start = token();
801
802 /* 'FROM' */
803 if (not expect(TK_From)) {
804 consume();
805 return recover<ErrorClause>(std::move(start), follow_set_FROM_CLAUSE);
806 }
807
808 /* table-or-select-statement { ',' table-or-select-statement } */
809 std::vector<FromClause::from_type> from;
810 do {
812 if (accept(TK_LPAR)) {
813 std::unique_ptr<Stmt> S = parse_SelectStmt();
814 if (not expect(TK_RPAR))
815 return recover<ErrorClause>(std::move(start), follow_set_FROM_CLAUSE);
816 accept(TK_As);
817 alias = token();
818 if (not expect(TK_IDENTIFIER))
819 return recover<ErrorClause>(std::move(start), follow_set_FROM_CLAUSE);
820 from.emplace_back(std::move(S), std::move(alias));
821 } else {
822 Token table = token();
823 if (not expect(TK_IDENTIFIER))
824 return recover<ErrorClause>(std::move(start), follow_set_FROM_CLAUSE);
825 if (accept(TK_As)) {
826 alias = token();
827 if (not expect(TK_IDENTIFIER))
828 return recover<ErrorClause>(std::move(start), follow_set_FROM_CLAUSE);
829 } else if (token().type == TK_IDENTIFIER) {
830 alias = token();
831 consume();
832 }
833 from.emplace_back(std::move(table), std::move(alias));
834 }
835 } while (accept(TK_COMMA));
836
837 return std::make_unique<FromClause>(std::move(start), std::move(from));
838}
839
840std::unique_ptr<Clause> Parser::parse_WhereClause()
841{
842 Token start = token();
843
844 /* 'WHERE' */
845 if (not expect(TK_Where)) {
846 consume();
847 return recover<ErrorClause>(std::move(start), follow_set_WHERE_CLAUSE);
848 }
849
850 /* expression */
851 std::unique_ptr<Expr> where = parse_Expr();
852
853 return std::make_unique<WhereClause>(std::move(start), std::move(where));
854}
855
856std::unique_ptr<Clause> Parser::parse_GroupByClause()
857{
858 Token start = token();
859
860 /* 'GROUP' 'BY' */
861 if (not expect(TK_Group)) {
862 consume();
863 return recover<ErrorClause>(std::move(start), follow_set_GROUP_BY_CLAUSE);
864 }
865 if (not expect(TK_By))
866 return recover<ErrorClause>(std::move(start), follow_set_GROUP_BY_CLAUSE);
867
868 /* expr [ 'AS' identifier ] { ',' expr [ 'AS' identifier ] } */
869 std::vector<GroupByClause::group_type> group_by;
870 do {
871 auto e = parse_Expr();
873 if (accept(TK_As)) {
874 tok = token();
875 if (not expect(TK_IDENTIFIER))
876 return recover<ErrorClause>(std::move(start), follow_set_GROUP_BY_CLAUSE);
877 } else if (token().type == TK_IDENTIFIER) {
878 tok = token();
879 consume();
880 }
881 group_by.emplace_back(std::move(e), std::move(tok));
882 } while (accept(TK_COMMA));
883
884 return std::make_unique<GroupByClause>(std::move(start), std::move(group_by));
885}
886
887std::unique_ptr<Clause> Parser::parse_HavingClause()
888{
889 Token start = token();
890
891 /* 'HAVING' */
892 if (not expect(TK_Having)) {
893 consume();
894 return recover<ErrorClause>(std::move(start), follow_set_HAVING_CLAUSE);
895 }
896
897 /* expression */
898 std::unique_ptr<Expr> having = parse_Expr();
899
900 return std::make_unique<HavingClause>(std::move(start), std::move(having));
901}
902
903std::unique_ptr<Clause> Parser::parse_OrderByClause()
904{
905 Token start = token();
906
907 /* 'ORDER' 'BY' */
908 if (not expect(TK_Order)) {
909 consume();
910 return recover<ErrorClause>(std::move(start), follow_set_ORDER_BY_CLAUSE);
911 }
912 if (not expect(TK_By))
913 return recover<ErrorClause>(std::move(start), follow_set_ORDER_BY_CLAUSE);
914
915 /* expression [ 'ASC' | 'DESC' ] { ',' expression [ 'ASC' | 'DESC' ] } */
916 std::vector<OrderByClause::order_type> order_by;
917 do {
918 auto e = parse_Expr();
919 if (accept(TK_Descending)) {
920 order_by.emplace_back(std::move(e), false);
921 } else {
922 accept(TK_Ascending);
923 order_by.emplace_back(std::move(e), true);
924 }
925 } while (accept(TK_COMMA));
926
927 return std::make_unique<OrderByClause>(std::move(start), std::move(order_by));
928}
929
930std::unique_ptr<Clause> Parser::parse_LimitClause()
931{
932 Token start = token();
933
934 /* 'LIMIT' integer-constant */
935 if (not expect(TK_Limit)) {
936 consume();
937 return recover<ErrorClause>(std::move(start), follow_set_LIMIT_CLAUSE);
938 }
939 Token limit = token();
940 if (limit.type == TK_DEC_INT or limit.type == TK_OCT_INT or limit.type == TK_HEX_INT) {
941 consume();
942 } else {
943 diag.e(limit.pos) << "expected integer limit, got " << limit.text << '\n';
944 return recover<ErrorClause>(std::move(start), follow_set_LIMIT_CLAUSE);
945 }
946
947 /* [ 'OFFSET' integer-constant ] */
949 if (accept(TK_Offset)) {
950 offset = token();
951 if (offset.type == TK_DEC_INT or offset.type == TK_OCT_INT or offset.type == TK_HEX_INT) {
952 consume();
953 } else {
954 diag.e(offset.pos) << "expected integer offset, got " << offset.text << '\n';
955 return recover<ErrorClause>(std::move(start), follow_set_LIMIT_CLAUSE);
956 }
957 }
958
959 return std::make_unique<LimitClause>(std::move(start), std::move(limit), std::move(offset));
960}
961
962/*======================================================================================================================
963 * Expressions
964 *====================================================================================================================*/
965
966std::unique_ptr<Expr> Parser::parse_Expr(const int precedence_lhs, std::unique_ptr<Expr> lhs)
967{
968 /*
969 * primary-expression ::= designator | constant | '(' expression ')' | '(' select-statement ')' ;
970 * unary-expression ::= [ '+' | '-' | '~' ] postfix-expression ;
971 * logical-not-expression ::= 'NOT' logical-not-expression | comparative-expression ;
972 */
973 switch (token().type) {
974 /* primary-expression */
975 case TK_IDENTIFIER:
976 lhs = parse_designator(); // XXX For SUM(x), 'SUM' is parsed as designator; should be identifier.
977 break;
978 case TK_Null:
979 case TK_True:
980 case TK_False:
981 case TK_STRING_LITERAL:
982 case TK_DATE:
983 case TK_DATE_TIME:
984 case TK_OCT_INT:
985 case TK_DEC_INT:
986 case TK_HEX_INT:
987 case TK_DEC_FLOAT:
988 case TK_HEX_FLOAT:
989 lhs = std::make_unique<Constant>(consume());
990 break;
991 case TK_LPAR:
992 consume();
993 if (token().type == TK_Select)
994 lhs = std::make_unique<QueryExpr>(token(), parse_SelectStmt());
995 else
996 lhs = parse_Expr();
997 if (not expect(TK_RPAR)) {
998 recover(follow_set_PRIMARY_EXPRESSION);
999 }
1000 break;
1001
1002 /* unary-expression */
1003 case TK_PLUS:
1004 case TK_MINUS:
1005 case TK_TILDE: {
1006 auto tok = consume();
1007 int p = get_precedence(TK_TILDE); // the precedence of TK_TILDE equals that of unary plus and minus
1008 lhs = std::make_unique<UnaryExpr>(std::move(tok), parse_Expr(p));
1009 break;
1010 }
1011
1012 /* logical-NOT-expression */
1013 case TK_Not: {
1014 auto tok = consume();
1015 int p = get_precedence(tok.type);
1016 lhs = std::make_unique<UnaryExpr>(std::move(tok), parse_Expr(p));
1017 break;
1018 }
1019
1020 default:
1021 diag.e(token().pos) << "expected expression, got " << token().text << '\n';
1022 recover(follow_set_EXPRESSION);
1023 return std::make_unique<ErrorExpr>(token());
1024 }
1025
1026 /* postfix-expression ::= postfix-expression '(' [ expression { ',' expression } ] ')' | primary-expression */
1027 while (token() == TK_LPAR) {
1028 Token lpar = consume();
1029 std::vector<std::unique_ptr<Expr>> args;
1030 if (token().type == TK_ASTERISK) {
1031 consume();
1032 } else if (token().type != TK_RPAR) {
1033 do
1034 args.push_back(parse_Expr());
1035 while (accept(TK_COMMA));
1036 }
1037 if (not expect(TK_RPAR)) {
1038 recover(follow_set_POSTFIX_EXPRESSION);
1039 lhs = std::make_unique<ErrorExpr>(token());
1040 continue;
1041 }
1042 lhs = std::make_unique<FnApplicationExpr>(std::move(lpar), std::move(lhs), std::move(args));
1043 }
1044
1045 for (;;) {
1046 Token op = token();
1047 int p = get_precedence(op);
1048 if (precedence_lhs > p) return lhs; // left operator has higher precedence_lhs
1049 consume();
1050
1051 auto rhs = parse_Expr(p + 1);
1052 lhs = std::make_unique<BinaryExpr>(std::move(op), std::move(lhs), std::move(rhs));
1053 }
1054}
1055
1056std::unique_ptr<Expr> Parser::parse_designator()
1057{
1058 Token lhs = token();
1059 if (not expect(TK_IDENTIFIER)) {
1060 recover(follow_set_DESIGNATOR);
1061 return std::make_unique<ErrorExpr>(std::move(lhs));
1062 }
1063 if (token() == TK_DOT) {
1064 Token dot = consume();
1065 Token rhs = token();
1066 if (not expect(TK_IDENTIFIER)) {
1067 recover(follow_set_DESIGNATOR);
1068 return std::make_unique<ErrorExpr>(std::move(rhs));
1069 }
1070 return std::make_unique<Designator>(std::move(dot), std::move(lhs), std::move(rhs)); // tbl.attr
1071 }
1072 return std::make_unique<Designator>(std::move(lhs)); // attr
1073}
1074
1075std::unique_ptr<Expr> Parser::expect_integer()
1076{
1077 if (is_integer(token().type)) {
1078 return std::make_unique<Constant>(consume());
1079 } else {
1080 diag.e(token().pos) << "expected integer constant, got " << token().text << '\n';
1081 return std::make_unique<ErrorExpr>(token());
1082 }
1083}
1084
1085/*======================================================================================================================
1086 * Types
1087 *====================================================================================================================*/
1088
1090{
1091 switch (token().type) {
1092 default:
1093 diag.e(token().pos) << "expected data-type, got " << token().text << '\n';
1094 goto error_recovery;
1095
1096 /* BOOL */
1097 case TK_Bool:
1098 consume();
1099 return Type::Get_Boolean(Type::TY_Scalar);
1100
1101 /* 'CHAR' '(' decimal-constant ')' */
1102 case TK_Char:
1103 /* 'VARCHAR' '(' decimal-constant ')' */
1104 case TK_Varchar: {
1105 bool is_varying = token().type == TK_Varchar;
1106 consume();
1107 if (not expect(TK_LPAR)) goto error_recovery;
1108 Token tok = token();
1109 if (not expect(TK_DEC_INT)) goto error_recovery;
1110 if (not expect(TK_RPAR)) goto error_recovery;
1111 errno = 0;
1112 std::size_t length = strtoul(*(tok.text), nullptr, 10);
1113 if (errno) {
1114 diag.e(tok.pos) << tok.text << " is not a valid length\n";
1115 goto error_recovery;
1116 }
1117 return is_varying ? Type::Get_Varchar(Type::TY_Scalar, length) : Type::Get_Char(Type::TY_Scalar, length);
1118 }
1119
1120 /* DATE */
1121 case TK_Date:
1122 consume();
1123 return Type::Get_Date(Type::TY_Scalar);
1124
1125 /* DATETIME */
1126 case TK_Datetime:
1127 consume();
1128 return Type::Get_Datetime(Type::TY_Scalar);
1129
1130 /* 'INT' '(' decimal-constant ')' */
1131 case TK_Int: {
1132 consume();
1133 if (not expect(TK_LPAR)) goto error_recovery;
1134 Token tok = token();
1135 if (not expect(TK_DEC_INT)) goto error_recovery;
1136 if (not expect(TK_RPAR)) goto error_recovery;
1137 errno = 0;
1138 std::size_t bytes = strtoul(*(tok.text), nullptr, 10);
1139 if (errno) {
1140 diag.e(tok.pos) << tok.text << " is not a valid size for an INT\n";
1141 goto error_recovery;
1142 }
1143 return Type::Get_Integer(Type::TY_Scalar, bytes);
1144 }
1145
1146 /* 'FLOAT' */
1147 case TK_Float:
1148 consume();
1149 return Type::Get_Float(Type::TY_Scalar);
1150
1151 /* 'DOUBLE' */
1152 case TK_Double:
1153 consume();
1154 return Type::Get_Double(Type::TY_Scalar);
1155
1156 /* 'DECIMAL' '(' decimal-constant [ ',' decimal-constant ] ')' */
1157 case TK_Decimal: {
1158 consume();
1159 if (not expect(TK_LPAR)) goto error_recovery;
1160 Token precision = token();
1162 if (not expect(TK_DEC_INT)) goto error_recovery;
1163 if (accept(TK_COMMA)) {
1164 scale = token();
1165 if (not expect(TK_DEC_INT)) goto error_recovery;
1166 }
1167 if (not expect(TK_RPAR)) goto error_recovery;
1168 errno = 0;
1169 std::size_t p = strtoul(*(precision.text), nullptr, 10);
1170 if (errno) {
1171 diag.e(precision.pos) << precision.text << " is not a valid precision for a DECIMAL\n";
1172 goto error_recovery;
1173 }
1174 errno = 0;
1175 std::size_t s = scale.text.has_value() ? strtoul(*(scale.text), nullptr, 10) : 0;
1176 if (errno) {
1177 diag.e(scale.pos) << scale.text << " is not a valid scale for a DECIMAL\n";
1178 goto error_recovery;
1179 }
1180 return Type::Get_Decimal(Type::TY_Scalar, p, s);
1181 }
1182 }
1183
1184error_recovery:
1185 recover(follow_set_DATA_TYPE);
1186 return Type::Get_Error();
1187}
struct @5 args
#define M_notnull(ARG)
Definition: macro.hpp:182
#define M_insist(...)
Definition: macro.hpp:129
‍mutable namespace
Definition: Backend.hpp:10
and
Definition: enum_ops.hpp:12
TokenType
Definition: TokenType.hpp:10
static Catalog & Get()
Return a reference to the single Catalog instance.
std::ostream & e(const Position pos)
Definition: Diagnostic.hpp:41
A data type representing a pooled (or internalized) object.
Definition: Pool.hpp:168
bool has_value() const
Definition: Pool.hpp:237
This class represents types in the SQL type system.
Definition: Type.hpp:46
static Pooled< CharacterSequence > Get_Char(category_t category, std::size_t length)
Returns a CharacterSequence type of the given category and fixed length.
Definition: Type.cpp:75
static Pooled< Numeric > Get_Double(category_t category)
Returns a Numeric type of given category for 64 bit floating-points.
Definition: Type.cpp:104
static M_LCOV_EXCL_STOP Pooled< ErrorType > Get_Error()
Returns a ErrorType.
Definition: Type.cpp:64
static Pooled< Numeric > Get_Decimal(category_t category, unsigned digits, unsigned scale)
Returns a Numeric type for decimals of given category, decimal digits, and scale.
Definition: Type.cpp:89
static Pooled< Date > Get_Date(category_t category)
Returns a Date type of the given category.
Definition: Type.cpp:85
static Pooled< Boolean > Get_Boolean(category_t category)
Returns a Boolean type of the given category.
Definition: Type.cpp:68
static Pooled< DateTime > Get_Datetime(category_t category)
Returns a DateTime type of the given category.
Definition: Type.cpp:87
static Pooled< Numeric > Get_Float(category_t category)
Returns a Numeric type of given category for 32 bit floating-points.
Definition: Type.cpp:99
static Pooled< Numeric > Get_Integer(category_t category, unsigned num_bytes)
Returns a Numeric type for integrals of given category and num_bytes bytes.
Definition: Type.cpp:94
static Pooled< CharacterSequence > Get_Varchar(category_t category, std::size_t length)
Returns a CharacterSequence type of the given category and varying length.
Definition: Type.cpp:80
An import statement for a delimiter separated values (DSV) file.
Definition: AST.hpp:1027
std::vector< element_type > tuple_t
Definition: AST.hpp:970
std::unique_ptr< Clause > parse_SelectClause()
Definition: Parser.cpp:750
const Type * parse_data_type()
Definition: Parser.cpp:1089
std::unique_ptr< Stmt > parse_SelectStmt()
Definition: Parser.cpp:513
std::unique_ptr< Stmt > parse_ImportStmt()
Definition: Parser.cpp:664
std::unique_ptr< Stmt > parse_DropIndexStmt()
Definition: Parser.cpp:480
std::array< bool, unsigned(TokenType::TokenType_MAX)+1 > follow_set_t
Definition: Parser.hpp:18
bool accept(const TokenType tt)
Definition: Parser.hpp:51
std::unique_ptr< Stmt > parse_DropDatabaseStmt()
Definition: Parser.cpp:212
bool is(const TokenType tt)
Definition: Parser.hpp:40
std::unique_ptr< Stmt > parse_CreateDatabaseStmt()
Definition: Parser.cpp:192
Token consume()
Definition: Parser.hpp:43
std::unique_ptr< Stmt > parse_DeleteStmt()
Definition: Parser.cpp:639
std::unique_ptr< Clause > parse_LimitClause()
Definition: Parser.cpp:930
std::unique_ptr< Clause > parse_FromClause()
Definition: Parser.cpp:798
std::unique_ptr< Stmt > parse_UpdateStmt()
Definition: Parser.cpp:600
void recover(const follow_set_t &FS)
Consumes tokens until the first occurence of a token in the follow set FS is found.
Definition: Parser.hpp:66
bool expect(const TokenType tt)
Definition: Parser.hpp:59
std::unique_ptr< Stmt > parse_CreateIndexStmt()
Definition: Parser.cpp:395
std::unique_ptr< Stmt > parse_UseDatabaseStmt()
Definition: Parser.cpp:241
const Token & token()
Definition: Parser.hpp:38
std::unique_ptr< Expr > expect_integer()
Definition: Parser.cpp:1075
std::unique_ptr< Stmt > parse_CreateTableStmt()
Definition: Parser.cpp:258
std::unique_ptr< Clause > parse_OrderByClause()
Definition: Parser.cpp:903
std::unique_ptr< Stmt > parse_Stmt()
Definition: Parser.cpp:133
std::unique_ptr< Command > parse()
Definition: Parser.cpp:98
std::unique_ptr< Clause > parse_GroupByClause()
Definition: Parser.cpp:856
std::unique_ptr< Clause > parse_WhereClause()
Definition: Parser.cpp:840
std::unique_ptr< Expr > parse_designator()
Definition: Parser.cpp:1056
Diagnostic & diag
Definition: Parser.hpp:22
std::unique_ptr< Expr > parse_Expr(int precedence_lhs=0, std::unique_ptr< Expr > lhs=nullptr)
Definition: Parser.cpp:966
std::unique_ptr< Stmt > parse_DropTableStmt()
Definition: Parser.cpp:362
std::unique_ptr< Clause > parse_HavingClause()
Definition: Parser.cpp:887
std::unique_ptr< Stmt > parse_InsertStmt()
Definition: Parser.cpp:545
std::unique_ptr< Instruction > parse_Instruction()
Definition: Parser.cpp:106
TokenType type
Definition: Token.hpp:17
ThreadSafePooledOptionalString text
declared as optional for dummy tokens
Definition: Token.hpp:16
static Token CreateArtificial(TokenType type=TK_EOF)
Definition: Token.hpp:29
Position pos
Definition: Token.hpp:15