mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
WasmUtil.cpp
Go to the documentation of this file.
2
7#include <optional>
8#include <regex>
9#include <tuple>
10
11
12using namespace m;
13using namespace m::storage;
14using namespace m::wasm;
15
16
17namespace {
18
19namespace options {
20
22bool pointer_sharing = true;
23
25bool remainder_removal = true;
26
27}
28
29__attribute__((constructor(201)))
30static void add_wasm_util_args()
31{
32 Catalog &C = Catalog::Get();
33
34 /*----- Command-line arguments -----*/
35 C.arg_parser().add<bool>(
36 /* group= */ "Wasm",
37 /* short= */ nullptr,
38 /* long= */ "--no-pointer-sharing",
39 /* description= */ "do not use pointer sharing optimization for data layout compilation",
40 /* callback= */ [](bool){ options::pointer_sharing = false; }
41 );
42 C.arg_parser().add<bool>(
43 /* group= */ "Wasm",
44 /* short= */ nullptr,
45 /* long= */ "--no-remainder-removal",
46 /* description= */ "do not use remainder removal optimization for data layout compilation",
47 /* callback= */ [](bool){ options::remainder_removal = false; }
48 );
49}
50
51}
52
53
54/*======================================================================================================================
55 * Helper functions
56 *====================================================================================================================*/
57
60template<arithmetic T>
61void convert_in_place(SQL_t &operand)
62{
63 std::visit(overloaded {
64 [&operand](auto &&actual) -> void requires requires { { actual.template to<T>() } -> sql_type; } {
65 auto v = actual.template to<T>();
66 operand.~SQL_t();
67 new (&operand) SQL_t(v);
68 },
69 [](auto &actual) -> void requires (not requires { { actual.template to<T>() } -> sql_type; }) {
70 M_unreachable("illegal conversion");
71 },
72 [](std::monostate) -> void { M_unreachable("invalid variant"); },
73 }, operand);
74}
75
78void convert_in_place(SQL_t &operand, const Numeric *to_type)
79{
80 switch (to_type->kind) {
81 case Numeric::N_Decimal:
82 M_unreachable("currently not supported");
83
84 case Numeric::N_Int:
85 switch (to_type->size()) {
86 default:
87 M_unreachable("invalid integer size");
88 case 8:
89 convert_in_place<int8_t>(operand);
90 return;
91 case 16:
92 convert_in_place<int16_t>(operand);
93 return;
94 case 32:
95 convert_in_place<int32_t>(operand);
96 return;
97 case 64:
98 convert_in_place<int64_t>(operand);
99 return;
100 }
101 break;
102 case Numeric::N_Float:
103 if (to_type->size() <= 32)
104 convert_in_place<float>(operand);
105 else
106 convert_in_place<double>(operand);
107 break;
108 }
109}
110
111template<bool CanBeNull, std::size_t L>
112std::conditional_t<CanBeNull, _Bool<L>, Bool<L>> compile_cnf(ExprCompiler &C, const cnf::CNF &cnf)
113{
114 using result_t = std::conditional_t<CanBeNull, _Bool<L>, Bool<L>>;
115
116 if (cnf.empty())
117 return result_t(true);
118
119 std::optional<result_t> wasm_cnf, wasm_clause;
120 for (auto &clause : cnf) {
121 wasm_clause.reset();
122 for (auto &pred : clause) {
123 /* Generate code for the literal of the predicate. */
124 M_insist(pred.expr().type()->is_boolean());
125 auto compiled = M_CONSTEXPR_COND(CanBeNull, C.compile<_Bool<L>>(pred.expr()),
126 C.compile<_Bool<L>>(pred.expr()).insist_not_null());
127 auto wasm_pred = pred.negative() ? not compiled : compiled;
128
129 /* Add the predicate to the clause with an `or`. */
130 if (wasm_clause)
131 wasm_clause.emplace(*wasm_clause or wasm_pred);
132 else
133 wasm_clause.emplace(wasm_pred);
134 }
135 M_insist(bool(wasm_clause), "empty clause?");
136
137 /* Add the clause to the CNF with an `and`. */
138 if (wasm_cnf)
139 wasm_cnf.emplace(*wasm_cnf and *wasm_clause);
140 else
141 wasm_cnf.emplace(*wasm_clause);
142 }
143 M_insist(bool(wasm_cnf), "empty CNF?");
144
145 return *wasm_cnf;
146}
147
148
149/*======================================================================================================================
150 * ExprCompiler
151 *====================================================================================================================*/
152
153void ExprCompiler::operator()(const ast::ErrorExpr&) { M_unreachable("no errors at this stage"); }
154
156{
157 if (e.type()->is_none()) { // create NULL
159 default: M_unreachable("invalid number of SIMD lanes");
160 case 1: set(_I32x1::Null()); break;
161 case 2: set(_I32x2::Null()); break;
162 case 4: set(_I32x4::Null()); break;
163 case 8: set(_I32x8::Null()); break;
164 case 16: set(_I32x16::Null()); break;
165 case 32: set(_I32x32::Null()); break;
166 }
167 return;
168 }
169
170 /* Search with fully qualified name. */
172 set(env_.get(id));
173}
174
176{
177 if (e.type()->is_none()) { // create NULL
179 default: M_unreachable("invalid number of SIMD lanes");
180 case 1: set(_I32x1::Null()); break;
181 case 2: set(_I32x2::Null()); break;
182 case 4: set(_I32x4::Null()); break;
183 case 8: set(_I32x8::Null()); break;
184 case 16: set(_I32x16::Null()); break;
185 case 32: set(_I32x32::Null()); break;
186 }
187 return;
188 }
189
190 /* Interpret constant. */
191 auto value = Interpreter::eval(e);
192
193 auto set_constant = [this, &e, &value]<std::size_t L>(){
194 auto set_helper = overloaded {
195 [this]<sql_type T>(T &&actual) { this->set(std::forward<T>(actual)); },
196 [](auto&&) { M_unreachable("not a SQL type"); }
197 };
198
200 [&value, &set_helper](const Boolean&) { set_helper(_Bool<L>(value.as_b())); },
201 [&value, &set_helper](const Numeric &n) {
202 switch (n.kind) {
203 case Numeric::N_Int:
204 case Numeric::N_Decimal:
205 switch (n.size()) {
206 default:
207 M_unreachable("invalid integer size");
208 case 8:
209 set_helper(_I8<L>(value.as_i()));
210 break;
211 case 16:
212 set_helper(_I16<L>(value.as_i()));
213 break;
214 case 32:
215 set_helper(_I32<L>(value.as_i()));
216 break;
217 case 64:
218 set_helper(_I64<L>(value.as_i()));
219 break;
220 }
221 break;
222 case Numeric::N_Float:
223 if (n.size() <= 32)
224 set_helper(_Float<L>(value.as_f()));
225 else
226 set_helper(_Double<L>(value.as_d()));
227 }
228 },
229 [this, &value](const CharacterSequence&) {
230 M_insist(L == 1, "string SIMDfication currently not supported");
231 set(CodeGenContext::Get().get_literal_address(value.as<const char*>()));
232 },
233 [&value, &set_helper](const Date&) { set_helper(_I32<L>(value.as_i())); },
234 [&value, &set_helper](const DateTime&) { set_helper(_I64<L>(value.as_i())); },
235 [](const NoneType&) { M_unreachable("should've been handled earlier"); },
236 [](auto&&) { M_unreachable("invalid type for given number of SIMD lanes"); },
237 }, *e.type());
238 };
240 default: M_unreachable("invalid number of SIMD lanes");
241 case 1: set_constant.operator()<1>(); break;
242 case 2: set_constant.operator()<2>(); break;
243 case 4: set_constant.operator()<4>(); break;
244 case 8: set_constant.operator()<8>(); break;
245 case 16: set_constant.operator()<16>(); break;
246 case 32: set_constant.operator()<32>(); break;
247 }
248}
249
251{
252 /* This is a helper to apply unary operations to `Expr<T>`s. It uses SFINAE within `overloaded` to only apply the
253 * operation if it is well typed, e.g. `+42` is ok whereas `+true` is not. */
254 auto apply_unop = [this, &e](auto unop) {
255 (*this)(*e.expr);
256 std::visit(overloaded {
257 [](std::monostate&&) -> void { M_unreachable("illegal value"); },
258 [this, &unop](auto &&expr) -> void requires requires { { unop(expr) } -> sql_type; } {
259 this->set(unop(expr));
260 },
261 [](auto &&expr) -> void requires (not requires { { unop(expr) } -> sql_type; }) {
262 M_unreachable("illegal operation");
263 },
264 }, get());
265 };
266
267#define UNOP(OP) apply_unop(overloaded { \
268 [](auto &&expr) -> decltype(expr.operator OP()) { return expr.operator OP(); }, \
269 }); \
270 break
271
272 switch (e.op().type) {
273 default:
274 M_unreachable("invalid operator");
275
276 case TK_PLUS: UNOP(+);
277 case TK_MINUS: UNOP(-);
278 case TK_TILDE: UNOP(~);
279 case TK_Not: UNOP(not);
280 }
281#undef UNOP
282}
283
285{
286 /* This is a helper to apply binary operations to `Expr<T>`s. It uses SFINAE within `overloaded` to only apply the
287 * operation if it is well typed, e.g. `42 + 13` is ok whereas `true + 42` is not. */
288 auto apply_binop = [this, &e](auto binop) {
289 (*this)(*e.lhs);
290 SQL_t lhs = get();
291
292 (*this)(*e.rhs);
293 SQL_t rhs = get();
294
295 if (e.common_operand_type) {
296 convert_in_place(lhs, e.common_operand_type); // convert in-place
297 convert_in_place(rhs, e.common_operand_type); // convert in-place
298 }
299
300 std::visit(overloaded {
301 [](std::monostate&&) -> void { M_unreachable("illegal value"); },
302 [this, &binop, &rhs](auto &&expr_lhs) -> void {
303 std::visit(overloaded {
304 [](std::monostate&&) -> void { M_unreachable("illegal value"); },
305 [this, expr_lhs, &binop](auto &&expr_rhs) mutable -> void
306 requires requires { { binop(expr_lhs, expr_rhs) } -> sql_type; } {
307 this->set(binop(expr_lhs, expr_rhs));
308 },
309 [](auto &&expr_rhs) -> void
310 requires (not requires { { binop(expr_lhs, expr_rhs) } -> sql_type; }) {
311 M_unreachable("illegal operation");
312 },
313 }, rhs);
314 },
315 }, lhs);
316 };
317
318#define BINOP(OP) apply_binop( \
319 [](auto lhs, auto rhs) -> decltype(lhs.operator OP(rhs)) { return lhs.operator OP(rhs); } \
320 ); break
321#define CMPOP(OP, STRCMP_OP) { \
322 if (e.lhs->type()->is_character_sequence()) { \
323 M_insist(e.rhs->type()->is_character_sequence()); \
324 M_insist(CodeGenContext::Get().num_simd_lanes() == 1, "invalid number of SIMD lanes"); \
325 apply_binop( \
326 [](NChar lhs, NChar rhs) -> _Boolx1 { \
327 return strcmp(lhs, rhs, STRCMP_OP); \
328 } \
329 ); break; \
330 } else { \
331 BINOP(OP); \
332 } \
333 }
334
335 switch (e.op().type) {
336 default:
337 M_unreachable("illegal token type");
338
339 /*----- Arithmetic operations --------------------------------------------------------------------------------*/
340 case TK_PLUS: BINOP(+);
341 case TK_MINUS: BINOP(-);
342 case TK_ASTERISK: BINOP(*);
343 case TK_SLASH: BINOP(/);
344 case TK_PERCENT: BINOP(%);
345
346 /*----- Comparison operations --------------------------------------------------------------------------------*/
347 case TK_EQUAL: CMPOP(==, EQ);
348 case TK_BANG_EQUAL: CMPOP(!=, NE);
349 case TK_LESS: CMPOP(<, LT);
350 case TK_LESS_EQUAL: CMPOP(<=, LE);
351 case TK_GREATER: CMPOP(>, GT);
352 case TK_GREATER_EQUAL: CMPOP(>=, GE);
353
354 /*----- CharacterSequence operations -------------------------------------------------------------------------*/
355 case TK_Like: {
356 M_insist(e.lhs->type()->is_character_sequence());
357 M_insist(e.rhs->type()->is_character_sequence());
358 M_insist(CodeGenContext::Get().num_simd_lanes() == 1, "invalid number of SIMD lanes");
359 (*this)(*e.lhs);
360 NChar str = get<NChar>();
361 if (auto static_pattern = cast<ast::Constant>(e.rhs.get())) { // check whether specialization is applicable
362 auto pattern = Catalog::Get().pool(
363 interpret(*static_pattern->tok.text.assert_not_none()) // interpret pattern to handle escaped chars
364 );
365 if (std::regex_match(*pattern, std::regex("%[^_%\\\\]+%"))) { // contains expression
366 set(like_contains(str, pattern));
367 break;
368 }
369 if (std::regex_match(*pattern, std::regex("[^_%\\\\]+%"))) { // prefix expression
370 set(like_prefix(str, pattern));
371 break;
372 }
373 if (std::regex_match(*pattern, std::regex("%[^_%\\\\]+"))) { // suffix expression
374 set(like_suffix(str, pattern));
375 break;
376 }
377 }
378 /* no specialization applicable, fallback to general dynamic programming approach */
379 (*this)(*e.rhs);
380 NChar pattern = get<NChar>();
381 set(like(str, pattern));
382 break;
383 }
384
385 case TK_DOTDOT: {
386 M_insist(e.lhs->type()->is_character_sequence());
387 M_insist(e.rhs->type()->is_character_sequence());
388 M_insist(CodeGenContext::Get().num_simd_lanes() == 1, "invalid number of SIMD lanes");
389 (*this)(*e.lhs);
390 NChar lhs = get<NChar>();
391 (*this)(*e.rhs);
392 NChar rhs = get<NChar>();
393
394 M_insist(e.lhs->can_be_null() == lhs.can_be_null());
395 M_insist(e.rhs->can_be_null() == rhs.can_be_null());
396
397 Var<Ptr<Charx1>> res; // always set here
398 bool res_can_be_null = lhs.can_be_null() or rhs.can_be_null();
399 std::size_t res_length = lhs.length() + rhs.length() + 1; // allocate space for terminating NUL byte
400
401 if (res_can_be_null) {
402 auto [_ptr_lhs, is_nullptr_lhs] = lhs.split();
403 auto [_ptr_rhs, is_nullptr_rhs] = rhs.split();
404 Ptr<Charx1> ptr_lhs(_ptr_lhs), ptr_rhs(_ptr_rhs); // since structured bindings cannot be used in lambda capture
405
406 IF (is_nullptr_lhs or is_nullptr_rhs) {
408 } ELSE {
409 res = Module::Allocator().pre_malloc<char>(res_length); // create pre-allocation for result
410 Var<Ptr<Charx1>> ptr(strncpy(res, ptr_lhs, U32x1(lhs.length()))); // since res must not be changed
411 strncpy(ptr, ptr_rhs, U32x1(rhs.size_in_bytes())).discard(); // copy with possible terminating NUL byte
412 if (not rhs.guarantees_terminating_nul())
413 *ptr = '\0'; // terminate with NUL byte
414 };
415 } else {
416 res = Module::Allocator().pre_malloc<char>(res_length); // create pre-allocation for result
417 Var<Ptr<Charx1>> ptr(strncpy(res, lhs, U32x1(lhs.length()))); // since res must not be changed
418 strncpy(ptr, rhs, U32x1(rhs.size_in_bytes())).discard(); // copy with possible terminating NUL byte
419 if (not rhs.guarantees_terminating_nul())
420 *ptr = '\0'; // terminate with NUL byte
421 }
422
423 set(SQL_t(NChar(res, res_can_be_null, res_length, /* guarantees_terminating_nul= */ true)));
424 break;
425 }
426
427 /*----- Logical operations -----------------------------------------------------------------------------------*/
428 case TK_And:
429 case TK_Or: {
430 M_insist(e.lhs->type()->is_boolean());
431 M_insist(e.rhs->type()->is_boolean());
432
433 (*this)(*e.lhs);
434 _Boolx1 lhs = get<_Boolx1>();
435 (*this)(*e.rhs);
436 _Boolx1 rhs = get<_Boolx1>();
437
438 if (e.op().type == TK_And)
439 set(lhs and rhs);
440 else
441 set(lhs or rhs);
442
443 break;
444 }
445 }
446#undef CMPOP
447#undef BINOP
448}
449
451{
452 switch (e.get_function().fnid) {
453 default:
454 M_unreachable("function kind not implemented");
455
457 M_unreachable("UDFs not yet supported");
458
459 /*----- NULL check -------------------------------------------------------------------------------------------*/
460 case m::Function::FN_ISNULL: {
461 (*this)(*e.args[0]);
462 auto arg = get();
463 std::visit(overloaded { // do not use constraint `is_sql_type` since `is_null()` returns a `PrimitiveExpr`
464 [this]<sql_type T>(T actual) -> void requires requires { SQL_t(actual.is_null()); } {
465 set(actual.is_null());
466 },
467 []<sql_type T>(T actual) -> void requires (not requires { SQL_t(actual.is_null()); }) {
468 M_unreachable("NULL check not supported");
469 },
470 [](std::monostate) -> void { M_unreachable("invalid variant"); },
471 }, arg);
472 break;
473 }
474
475 /*----- Type cast --------------------------------------------------------------------------------------------*/
476 case m::Function::FN_INT: {
477 (*this)(*e.args[0]);
478 auto arg = get();
479 convert_in_place<int32_t>(arg);
480 set(std::move(arg));
481 break;
482 }
483
484 /*----- Aggregate functions ----------------------------------------------------------------------------------*/
485 case m::Function::FN_COUNT:
486 case m::Function::FN_MIN:
487 case m::Function::FN_MAX:
488 case m::Function::FN_SUM:
489 case m::Function::FN_AVG: {
490 std::ostringstream oss;
491 oss << e;
492 Schema::Identifier id(Catalog::Get().pool(oss.str().c_str()));
493 set(env_.get(id));
494 }
495 }
496}
497
499{
500 /* Search with fully qualified name. */
502 set(env_.get(id));
503}
504
506{
508 default: M_unreachable("invalid number of SIMD lanes");
509 case 1: return cnf.can_be_null() ? compile_cnf<true, 1>(*this, cnf) : compile_cnf<false, 1>(*this, cnf);
510 case 16: return cnf.can_be_null() ? compile_cnf<true, 16>(*this, cnf) : compile_cnf<false, 16>(*this, cnf);
511 case 32: return cnf.can_be_null() ? compile_cnf<true, 32>(*this, cnf) : compile_cnf<false, 32>(*this, cnf);
512 }
513}
514
515
516
517/*======================================================================================================================
518 * Environment
519 *====================================================================================================================*/
520
522void Environment::dump(std::ostream &out) const
523{
524 out << "WasmEnvironment\n` entries: { ";
525 for (auto it = exprs_.begin(), end = exprs_.end(); it != end; ++it) {
526 if (it != exprs_.begin()) out << ", ";
527 out << it->first;
528 }
529 out << " }" << std::endl;
530
531 out << "WasmEnvironment\n` address entries: { ";
532 for (auto it = expr_addrs_.begin(), end = expr_addrs_.end(); it != end; ++it) {
533 if (it != expr_addrs_.begin()) out << ", ";
534 out << it->first;
535 }
536 out << " }" << std::endl;
537}
538
539void Environment::dump() const { dump(std::cerr); }
541
542
543/*======================================================================================================================
544 * CodeGenContext
545 *====================================================================================================================*/
546
547thread_local std::unique_ptr<CodeGenContext> CodeGenContext::the_context_;
548
549
550/*======================================================================================================================
551 * compile data layout
552 *====================================================================================================================*/
553
554namespace m {
555
556namespace wasm {
557
572template<bool IsStore, std::size_t L, bool SinglePass, bool PointerSharing, VariableKind Kind>
573requires (L > 0) and (is_pow_2(L))
574std::tuple<Block, Block, Block>
575compile_data_layout_sequential(const Schema &_tuple_value_schema, const Schema &_tuple_addr_schema,
576 Ptr<void> base_address, const storage::DataLayout &layout, const Schema &layout_schema,
578{
579 const auto tuple_value_schema = _tuple_value_schema.deduplicate().drop_constants();
580 const auto tuple_addr_schema = _tuple_addr_schema.deduplicate().drop_constants();
581
582 M_insist(tuple_value_schema.num_entries() != 0, "sequential access must access at least one tuple schema entry");
583 M_insist(not IsStore or tuple_addr_schema.num_entries() == 0, "addresses are only computed for loads");
584#ifndef NDEBUG
585 for (auto &e : tuple_value_schema)
586 M_insist(layout_schema.find(e.id) != layout_schema.cend(), "tuple value schema entry not found");
587 for (auto &e : tuple_addr_schema) {
588 auto it = layout_schema.find(e.id);
589 M_insist(it != layout_schema.cend(), "tuple address schema entry not found");
590 M_insist(not it->nullable(), "nullable tuple address schema entry not yet supported");
591 M_insist(not it->type->is_boolean(), "boolean tuple address schema entry not yet supported");
592 M_insist(not it->type->is_character_sequence(), "character sequence tuple address schema entry omitted");
593 }
594#endif
595
598 Block inits("inits", false), stores("stores", false), loads("loads", false), jumps("jumps", false);
600 SQL_t values[tuple_value_schema.num_entries()];
602 SQL_addr_t *addrs;
603 if (not tuple_addr_schema.empty())
604 addrs = static_cast<SQL_addr_t*>(alloca(sizeof(SQL_addr_t) * tuple_addr_schema.num_entries()));
606 Bool<L> *null_bits;
607 if constexpr (not IsStore)
608 null_bits = static_cast<Bool<L>*>(alloca(sizeof(Bool<L>) * tuple_value_schema.num_entries()));
609
610 using key_t = std::pair<uint8_t, uint64_t>;
612 using ptr_t = std::conditional_t<SinglePass, Var<Ptr<void>>, Global<Ptr<void>>>;
614 using mask_t = std::conditional_t<SinglePass, Var<U32x1>, Global<U32x1>>;
615 struct value_t
616 {
617 ptr_t ptr;
618 std::optional<mask_t> mask;
619 };
623 std::conditional_t<
624 PointerSharing, std::unordered_map<key_t, value_t>, std::vector<std::pair<key_t, value_t>>
625 > loading_context;
626
627 auto &env = CodeGenContext::Get().env(); // the current codegen environment
628
629 if constexpr (L > 1) {
630 BLOCK_OPEN(inits) {
631 Wasm_insist(tuple_id % uint32_t(L) == 0U, "must start at a tuple ID beginning a SIMD batch");
632 }
633 }
634
635 /*----- Check whether any of the entries in `tuple_value_schema` can be NULL, so that we need the NULL bitmap. -----*/
636 const bool needs_null_bitmap = [&]() {
637 for (auto &tuple_entry : tuple_value_schema) {
638 if (layout_schema[tuple_entry.id].second.nullable())
639 return true; // found an entry in `tuple_value_schema` that can be NULL according to `layout_schema`
640 }
641 return false; // no attribute in `tuple_value_schema` can be NULL according to `layout_schema`
642 }();
643 bool has_null_bitmap = false; // indicates whether the data layout specifies a NULL bitmap
644
645 /*----- If predication is used, introduce predication variable and update it before storing a tuple. -----*/
646 const bool is_predicated = env.predicated();
647 M_insist(not is_predicated or (IsStore and L == 1), "predication only supported for storing scalar tuples");
648 std::optional<Var<Boolx1>> pred;
649 if (is_predicated) {
650 BLOCK_OPEN(stores) {
651 pred = env.extract_predicate<_Boolx1>().is_true_and_not_null();
652 }
653 }
654
655 /*----- Increment tuple ID before advancing to the next tuple pack. -----*/
656 if constexpr (IsStore) {
657 BLOCK_OPEN(jumps) {
658 if (is_predicated) {
659 M_insist(L == 1);
660 M_insist(bool(pred));
661 tuple_id += pred->to<uint32_t>();
662 } else {
663 tuple_id += uint32_t(L);
664 }
665 }
666 } else {
667 BLOCK_OPEN(jumps) {
668 tuple_id += uint32_t(L);
669 }
670 }
671
672 /*----- Visit the data layout. -----*/
673 layout.for_sibling_leaves(
674 [&, &inits=inits, &jumps=jumps, &stores=stores, &loads=loads] // explicitly capture references non-const
675 (const std::vector<DataLayout::leaf_info_t> &leaves, const DataLayout::level_info_stack_t &levels,
676 uint64_t inode_offset_in_bits)
677 {
678 /*----- Clear the per-leaf data structure. -----*/
679 loading_context.clear();
680
681 /*----- Remember whether and where we found the NULL bitmap. -----*/
682 std::optional<ptr_t> null_bitmap_ptr;
683 std::optional<mask_t> null_bitmap_mask;
684 uint8_t null_bitmap_bit_offset;
685 uint64_t null_bitmap_stride_in_bits;
686
687 /*----- Compute INode offset in bytes and INode iteration depending on the given tuple ID. -----*/
688 auto compute_additional_inode_byte_offset = [&](U32x1 tuple_id) -> U64x1 {
689 auto rec = [&](U32x1 curr_tuple_id, decltype(levels.cbegin()) curr, const decltype(levels.cend()) end,
690 auto rec) -> U64x1
691 {
692 if (curr == end) {
693 Wasm_insist(curr_tuple_id == tuple_id % uint32_t(levels.back().num_tuples));
694 return U64x1(0);
695 }
696
697 if (is_pow_2(curr->num_tuples)) {
698 U32x1 child_iter = curr_tuple_id.clone() >> uint32_t(__builtin_ctzl(curr->num_tuples));
699 U32x1 inner_tuple_id = curr_tuple_id bitand uint32_t(curr->num_tuples - 1U);
700 M_insist(curr->stride_in_bits % 8 == 0, "INode stride must be byte aligned");
701 U64x1 offset_in_bytes = child_iter * uint64_t(curr->stride_in_bits / 8);
702 return offset_in_bytes + rec(inner_tuple_id, std::next(curr), end, rec);
703 } else {
704 U32x1 child_iter = curr_tuple_id.clone() / uint32_t(curr->num_tuples);
705 U32x1 inner_tuple_id = curr_tuple_id % uint32_t(curr->num_tuples);
706 M_insist(curr->stride_in_bits % 8 == 0, "INode stride must be byte aligned");
707 U64x1 offset_in_bytes = child_iter * uint64_t(curr->stride_in_bits / 8);
708 return offset_in_bytes + rec(inner_tuple_id, std::next(curr), end, rec);
709 }
710 };
711 return rec(tuple_id.clone(), levels.cbegin(), levels.cend(), rec);
712 };
713 std::optional<const Var<I32x1>> inode_byte_offset;
714 std::optional<const Var<U32x1>> inode_iter;
715 BLOCK_OPEN(inits) {
716 M_insist(inode_offset_in_bits % 8 == 0, "INode offset must be byte aligned");
717 inode_byte_offset.emplace(
718 int32_t(inode_offset_in_bits / 8)
719 + compute_additional_inode_byte_offset(tuple_id).make_signed().template to<int32_t>()
720 );
721 M_insist(levels.back().num_tuples != 0, "INode must be large enough for at least one tuple");
722 if (levels.back().num_tuples != 1) {
723 inode_iter.emplace(
724 is_pow_2(levels.back().num_tuples) ? tuple_id bitand uint32_t(levels.back().num_tuples - 1U)
725 : tuple_id % uint32_t(levels.back().num_tuples)
726 );
727 } else {
728 /* omit computation of INode iteration since it is always the first iteration, i.e. equals 0 */
729 }
730 };
731
732 /*----- Iterate over sibling leaves, i.e. leaf children of a common parent INode, to emit code. -----*/
733 for (auto &leaf_info : leaves) {
734 const uint8_t bit_stride = leaf_info.stride_in_bits % 8; // need byte stride later for the stride jumps
735
736 if (leaf_info.leaf.index() == layout_schema.num_entries()) { // NULL bitmap
737 if (not needs_null_bitmap)
738 continue;
739
741 M_insist(not has_null_bitmap, "at most one bitmap may be specified");
742 has_null_bitmap = true;
743 if (bit_stride) { // NULL bitmap with bit stride requires dynamic masking
744 M_insist(L == 1, "SIMDfied loading of NULL bitmap with bit stride currently not supported");
745
746 M_insist(bool(inode_iter), "stride requires repetition");
747 U64x1 leaf_offset_in_bits = leaf_info.offset_in_bits + *inode_iter * leaf_info.stride_in_bits;
748 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>() ; // mod 8
749 I32x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed().to<int32_t>(); // div 8
750
751 null_bitmap_bit_offset = leaf_info.offset_in_bits % 8;
752 null_bitmap_stride_in_bits = leaf_info.stride_in_bits;
753 BLOCK_OPEN(inits) {
754 /*----- Initialize pointer and mask. -----*/
755 null_bitmap_ptr.emplace(); // default-construct for globals to be able to use assignment below
756 *null_bitmap_ptr = base_address.clone() + *inode_byte_offset + leaf_byte_offset;
757 null_bitmap_mask.emplace(); // default-construct for globals to be able to use assignment below
758 *null_bitmap_mask = 1U << leaf_bit_offset;
759 }
760
761 /*----- Iterate over layout entries in *ascending* order. -----*/
762 std::size_t prev_layout_idx = 0;
763 for (std::size_t layout_idx = 0; layout_idx < layout_schema.num_entries(); ++layout_idx) {
764 auto &layout_entry = layout_schema[layout_idx];
765 if (layout_entry.nullable()) { // layout entry may be NULL
766 auto tuple_it = tuple_value_schema.find(layout_entry.id);
767 if (tuple_it == tuple_value_schema.end())
768 continue; // entry not contained in tuple schema
769 M_insist(prev_layout_idx == 0 or layout_idx > prev_layout_idx,
770 "layout entries not processed in ascending order");
771 M_insist(*tuple_it->type == *layout_entry.type);
772 const auto delta = layout_idx - prev_layout_idx;
773 const uint8_t bit_delta = delta % 8;
774 const int32_t byte_delta = delta / 8;
775
776 auto advance_to_next_bit = [&]() {
777 if (bit_delta) {
778 if (is_predicated) {
779 M_insist(bool(pred));
780 *null_bitmap_mask <<=
781 Select(*pred, bit_delta, uint8_t(0)); // possibly advance mask
782 } else {
783 *null_bitmap_mask <<= bit_delta; // advance mask
784 }
785 /* If the mask surpasses the first byte, advance pointer to the next byte... */
786 *null_bitmap_ptr += (*null_bitmap_mask bitand 0xffU).eqz().template to<int32_t>();
787 /* ... and remove lowest byte from the mask. */
788 *null_bitmap_mask = Select((*null_bitmap_mask bitand 0xffU).eqz(),
789 *null_bitmap_mask >> 8U, *null_bitmap_mask);
790 }
791 if (byte_delta) {
792 if (is_predicated) {
793 M_insist(bool(pred));
794 *null_bitmap_ptr +=
795 Select(*pred, byte_delta, 0); // possibly advance pointer
796 } else {
797 *null_bitmap_ptr += byte_delta; // advance pointer
798 }
799 }
800 };
801
802 if constexpr (IsStore) {
803 /*----- Store NULL bit depending on its type. -----*/
804 auto store = [&]<typename T>() {
805 BLOCK_OPEN(stores) {
806 advance_to_next_bit();
807
808 auto [value, is_null] = env.get<T>(tuple_it->id).split(); // get value
809 value.discard(); // handled at entry leaf
810 setbit(null_bitmap_ptr->template to<uint8_t*>(), is_null,
811 null_bitmap_mask->template to<uint8_t>()); // update bit
812 }
813 };
815 [&](const Boolean&) { store.template operator()<_Boolx1>(); },
816 [&](const Numeric &n) {
817 switch (n.kind) {
818 case Numeric::N_Int:
819 case Numeric::N_Decimal:
820 switch (n.size()) {
821 default: M_unreachable("invalid size");
822 case 8: store.template operator()<_I8x1 >(); break;
823 case 16: store.template operator()<_I16x1>(); break;
824 case 32: store.template operator()<_I32x1>(); break;
825 case 64: store.template operator()<_I64x1>(); break;
826 }
827 break;
828 case Numeric::N_Float:
829 if (n.size() <= 32)
830 store.template operator()<_Floatx1>();
831 else
832 store.template operator()<_Doublex1>();
833 }
834 },
835 [&](const CharacterSequence&) {
836 BLOCK_OPEN(stores) {
837 advance_to_next_bit();
838
839 auto value = env.get<NChar>(tuple_it->id); // get value
840 setbit(null_bitmap_ptr->template to<uint8_t*>(), value.is_null(),
841 null_bitmap_mask->template to<uint8_t>()); // update bit
842 }
843 },
844 [&](const Date&) { store.template operator()<_I32x1>(); },
845 [&](const DateTime&) { store.template operator()<_I64x1>(); },
846 [](auto&&) { M_unreachable("invalid type"); },
847 }, *tuple_it->type);
848 } else {
849 const auto tuple_idx = std::distance(tuple_value_schema.begin(), tuple_it);
850 BLOCK_OPEN(loads) {
851 advance_to_next_bit();
852
853 U8x1 byte = *null_bitmap_ptr->template to<uint8_t*>(); // load the byte
855 (byte bitand *null_bitmap_mask).template to<bool>()
856 ); // mask bit with dynamic mask
857 new (&null_bits[tuple_idx]) Boolx1(value);
858 /* Address for NULL bits not yet supported. */
859 }
860 }
861
862 prev_layout_idx = layout_idx;
863 } else { // layout entry must not be NULL
864#ifndef NDEBUG
865 if constexpr (IsStore) {
866 /*----- Check that value is also not NULL. -----*/
867 auto check = overloaded{
868 [&]<sql_type T>() {
869 BLOCK_OPEN(stores) {
870 Wasm_insist(env.get<T>(layout_entry.id).not_null(),
871 "value of non-nullable entry must not be nullable");
872 }
873 },
874 []<typename>() {
875 M_unreachable("invalid type for given number of SIMD lanes");
876 }
877 };
879 [&](const Boolean&) { check.template operator()<_Bool<L>>(); },
880 [&](const Numeric &n) {
881 switch (n.kind) {
882 case Numeric::N_Int:
883 case Numeric::N_Decimal:
884 switch (n.size()) {
885 default: M_unreachable("invalid size");
886 case 8: check.template operator()<_I8 <L>>(); break;
887 case 16: check.template operator()<_I16<L>>(); break;
888 case 32: check.template operator()<_I32<L>>(); break;
889 case 64: check.template operator()<_I64<L>>(); break;
890 }
891 break;
892 case Numeric::N_Float:
893 if (n.size() <= 32)
894 check.template operator()<_Float<L>>();
895 else
896 check.template operator()<_Double<L>>();
897 }
898 },
899 [&](const CharacterSequence&) { check.template operator()<NChar>(); },
900 [&](const Date&) { check.template operator()<_I32<L>>(); },
901 [&](const DateTime&) { check.template operator()<_I64<L>>(); },
902 [](auto&&) { M_unreachable("invalid type"); },
903 }, *layout_entry.type);
904 }
905#endif
906 }
907 }
908
909 /*----- Final advancement of the pointer and mask to match the leaf's stride. -----*/
910 /* This is done here (and not together with the other stride jumps further below) since we only need
911 * to advance by `delta` bits since we already have advanced by `prev_layout_idx` bits. */
912 const auto delta = leaf_info.stride_in_bits - prev_layout_idx;
913 const uint8_t bit_delta = delta % 8;
914 const int32_t byte_delta = delta / 8;
915 if (bit_delta) {
916 BLOCK_OPEN(jumps) {
917 if (is_predicated) {
918 M_insist(bool(pred));
919 *null_bitmap_mask <<= Select(*pred, bit_delta, uint8_t(0)); // possibly advance mask
920 } else {
921 *null_bitmap_mask <<= bit_delta; // advance mask
922 }
923 /* If the mask surpasses the first byte, advance pointer to the next byte... */
924 *null_bitmap_ptr += (*null_bitmap_mask bitand 0xffU).eqz().template to<int32_t>();
925 /* ... and remove the lowest byte from the mask. */
926 *null_bitmap_mask = Select((*null_bitmap_mask bitand 0xffU).eqz(),
927 *null_bitmap_mask >> 8U, *null_bitmap_mask);
928 }
929 }
930 if (byte_delta) {
931 BLOCK_OPEN(jumps) {
932 if (is_predicated) {
933 M_insist(bool(pred));
934 *null_bitmap_ptr += Select(*pred, byte_delta, 0); // possibly advance pointer
935 } else {
936 *null_bitmap_ptr += byte_delta; // advance pointer
937 }
938 }
939 }
940 } else { // NULL bitmap without bit stride can benefit from static masking of NULL bits
941 M_insist(L == 1 or L >= 16,
942 "NULL bits must fill at least an entire SIMD vector when loading SIMDfied");
943 M_insist(L == 1 or tuple_value_schema.num_entries() <= 64,
944 "bytes containing a NULL bitmap must fit into scalar value when loading SIMDfied");
945 M_insist(L == 1 or
946 std::max(ceil_to_pow_2(tuple_value_schema.num_entries()), 8UL) == leaf_info.stride_in_bits,
947 "NULL bitmaps must be packed s.t. the distance between two NULL bits of a single "
948 "attribute is a power of 2 when loading SIMDfied");
949 M_insist(L == 1 or leaf_info.offset_in_bits % 8 == 0,
950 "NULL bitmaps must not start with bit offset when loading SIMDfied");
951
952 auto byte_offset = [&]() -> I32x1 {
953 if (inode_iter and leaf_info.stride_in_bits) {
954 /* omit `leaf_info.offset_in_bits` here to add it to the static offsets and masks;
955 * this is valid since no bit stride means that the leaf byte offset computation is
956 * independent of the static parts */
957 U64x1 leaf_offset_in_bits = *inode_iter * leaf_info.stride_in_bits;
958 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>(); // mod 8
959 I32x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed().to<int32_t>(); // div 8
960 BLOCK_OPEN(inits) {
961 Wasm_insist(leaf_bit_offset == 0U, "no leaf bit offset without bit stride");
962 }
963 return *inode_byte_offset + leaf_byte_offset;
964 } else {
965 return *inode_byte_offset;
966 }
967 }();
968
969 key_t key(leaf_info.offset_in_bits % 8, leaf_info.stride_in_bits);
970 auto [it, inserted] =
971 M_CONSTEXPR_COND(PointerSharing,
972 loading_context.try_emplace(std::move(key)),
973 std::make_pair(loading_context.emplace(loading_context.end(), std::move(key), value_t()), true));
974 if (inserted) {
975 BLOCK_OPEN(inits) {
976 it->second.ptr = base_address.clone() + byte_offset;
977 }
978 } else {
979 byte_offset.discard();
980 }
981 const auto &ptr = it->second.ptr;
982
984 std::unordered_map<int32_t, Var<U8x1>> loaded_bytes;
985
986 using bytes_t = std::variant<std::monostate, Var<U8<L>>, Var<U16<L>>, Var<U32<L>>, Var<U64<L>>>;
988 bytes_t bytes;
989 if constexpr (not IsStore and L > 1) {
990 auto emplace = [&]<typename T>() {
991 using type = typename T::type;
992 static constexpr std::size_t lanes = T::num_simd_lanes;
993 BLOCK_OPEN(loads) {
994 bytes.template emplace<Var<T>>(
995 *(ptr + leaf_info.offset_in_bits / 8).template to<type*, lanes>()
996 );
997 }
998 };
999 switch (ceil_to_pow_2(tuple_value_schema.num_entries())) {
1000 default: emplace.template operator()<U8 <L>>(); break; // <= 8
1001 case 16: emplace.template operator()<U16<L>>(); break;
1002 case 32: emplace.template operator()<U32<L>>(); break;
1003 case 64: emplace.template operator()<U64<L>>(); break;
1004 }
1005 }
1006
1007 /*----- For each tuple entry that can be NULL, create a store/load with static offset and mask. --*/
1008 for (std::size_t tuple_idx = 0; tuple_idx != tuple_value_schema.num_entries(); ++tuple_idx) {
1009 auto &tuple_entry = tuple_value_schema[tuple_idx];
1010 const auto &[layout_idx, layout_entry] = layout_schema[tuple_entry.id];
1011 M_insist(*tuple_entry.type == *layout_entry.type);
1012 if (layout_entry.nullable()) { // layout entry may be NULL
1013 const uint8_t static_bit_offset = (leaf_info.offset_in_bits + layout_idx) % 8;
1014 const int32_t static_byte_offset = (leaf_info.offset_in_bits + layout_idx) / 8;
1015 if constexpr (IsStore) {
1016 /*----- Store NULL bit depending on its type. -----*/
1017 auto store = overloaded{
1018 [&]<sql_type T>() {
1019 BLOCK_OPEN(stores) {
1020 auto [value, is_null] = env.get<T>(tuple_entry.id).split(); // get value
1021 value.discard(); // handled at entry leaf
1022 if constexpr (L == 1) {
1023 Ptr<U8x1> byte_ptr =
1024 (ptr + static_byte_offset).template to<uint8_t*>(); // compute byte address
1025 setbit<U8x1>(byte_ptr, is_null, static_bit_offset); // update bit
1026 } else {
1027 auto store = [&, is_null, layout_idx]<typename U>() { // copy due to structured binding
1028 using type = typename U::type;
1029 static constexpr std::size_t lanes = U::num_simd_lanes;
1030 Ptr<U> bytes_ptr =
1031 (ptr + leaf_info.offset_in_bits / 8).template to<type*, lanes>(); // compute bytes address
1032 setbit<U>(bytes_ptr, is_null, layout_idx); // update bits
1033 };
1034 switch (ceil_to_pow_2(tuple_value_schema.num_entries())) {
1035 default: store.template operator()<U8 <L>>(); break; // <= 8
1036 case 16: store.template operator()<U16<L>>(); break;
1037 case 32: store.template operator()<U32<L>>(); break;
1038 case 64: store.template operator()<U64<L>>(); break;
1039 }
1040 }
1041 }
1042 },
1043 []<typename>() {
1044 M_unreachable("invalid type for given number of SIMD lanes");
1045 }
1046 };
1048 [&](const Boolean&) { store.template operator()<_Bool<L>>(); },
1049 [&](const Numeric &n) {
1050 switch (n.kind) {
1051 case Numeric::N_Int:
1052 case Numeric::N_Decimal:
1053 switch (n.size()) {
1054 default: M_unreachable("invalid size");
1055 case 8: store.template operator()<_I8 <L>>(); break;
1056 case 16: store.template operator()<_I16<L>>(); break;
1057 case 32: store.template operator()<_I32<L>>(); break;
1058 case 64: store.template operator()<_I64<L>>(); break;
1059 }
1060 break;
1061 case Numeric::N_Float:
1062 if (n.size() <= 32)
1063 store.template operator()<_Float<L>>();
1064 else
1065 store.template operator()<_Double<L>>();
1066 }
1067 },
1068 [&](const CharacterSequence&) {
1069 M_insist(L == 1, "string SIMDfication currently not supported");
1070 BLOCK_OPEN(stores) {
1071 auto value = env.get<NChar>(tuple_entry.id); // get value
1072 Ptr<U8x1> byte_ptr =
1073 (ptr + static_byte_offset).template to<uint8_t*>(); // compute byte address
1074 setbit<U8x1>(byte_ptr, value.is_null(), static_bit_offset); // update bit
1075 }
1076 },
1077 [&](const Date&) { store.template operator()<_I32<L>>(); },
1078 [&](const DateTime&) { store.template operator()<_I64<L>>(); },
1079 [](auto&&) { M_unreachable("invalid type"); },
1080 }, *tuple_entry.type);
1081 } else {
1082 /*----- Load NULL bit. -----*/
1083 BLOCK_OPEN(loads) {
1084 if constexpr (L == 1) {
1085 auto [it, inserted] = loaded_bytes.try_emplace(static_byte_offset);
1086 if (inserted)
1087 it->second = *(ptr + static_byte_offset).template to<uint8_t*>(); // load the byte
1088 const auto &byte = it->second;
1089 const uint8_t static_mask = 1U << static_bit_offset;
1090 Var<Boolx1> value((byte bitand static_mask).to<bool>()); // mask bit with static mask
1091 new (&null_bits[tuple_idx]) Boolx1(value);
1092 /* Address for NULL bits not yet supported. */
1093 } else {
1094 std::visit(overloaded{
1095 [&, layout_idx]<typename T> // copy due to structured binding
1097 requires (L >= 16) {
1098 PrimitiveExpr<T, L> static_mask(1U << layout_idx);
1100 (_bytes bitand static_mask).template to<bool>() // mask bits with static mask
1101 );
1102 new (&null_bits[tuple_idx]) Bool<L>(value);
1103 /* Address for NULL bits not yet supported. */
1104 },
1105 [](auto&) { M_unreachable("invalid number of SIMD lanes"); },
1106 [](std::monostate&) { M_unreachable("invalid variant"); },
1107 }, const_cast<const bytes_t&>(bytes));
1108 }
1109 }
1110 }
1111 } else { // entry must not be NULL
1112#ifndef NDEBUG
1113 if constexpr (IsStore) {
1114 /*----- Check that value is also not NULL. -----*/
1115 auto check = overloaded{
1116 [&, layout_entry]<sql_type T>() { // copy due to structured binding
1117 BLOCK_OPEN(stores) {
1118 Wasm_insist(env.get<T>(layout_entry.id).not_null(),
1119 "value of non-nullable entry must not be nullable");
1120 }
1121 },
1122 []<typename>() {
1123 M_unreachable("invalid type for given number of SIMD lanes");
1124 }
1125 };
1127 [&](const Boolean&) { check.template operator()<_Bool<L>>(); },
1128 [&](const Numeric &n) {
1129 switch (n.kind) {
1130 case Numeric::N_Int:
1131 case Numeric::N_Decimal:
1132 switch (n.size()) {
1133 default: M_unreachable("invalid size");
1134 case 8: check.template operator()<_I8 <L>>(); break;
1135 case 16: check.template operator()<_I16<L>>(); break;
1136 case 32: check.template operator()<_I32<L>>(); break;
1137 case 64: check.template operator()<_I64<L>>(); break;
1138 }
1139 break;
1140 case Numeric::N_Float:
1141 if (n.size() <= 32)
1142 check.template operator()<_Float<L>>();
1143 else
1144 check.template operator()<_Double<L>>();
1145 }
1146 },
1147 [&](const CharacterSequence&) { check.template operator()<NChar>(); },
1148 [&](const Date&) { check.template operator()<_I32<L>>(); },
1149 [&](const DateTime&) { check.template operator()<_I64<L>>(); },
1150 [](auto&&) { M_unreachable("invalid type"); },
1151 }, *tuple_entry.type);
1152 }
1153#endif
1154 }
1155 }
1156 }
1157 } else { // regular entry
1158 auto &layout_entry = layout_schema[leaf_info.leaf.index()];
1159 M_insist(*layout_entry.type == *leaf_info.leaf.type());
1160 auto tuple_value_it = tuple_value_schema.find(layout_entry.id);
1161 auto tuple_addr_it = tuple_addr_schema.find(layout_entry.id);
1162 if (tuple_value_it == tuple_value_schema.end() and tuple_addr_it == tuple_addr_schema.end())
1163 continue; // entry not contained in both tuple schemas
1164 auto tuple_it = tuple_value_it != tuple_value_schema.end() ? tuple_value_it : tuple_addr_it;
1165 M_insist(*tuple_it->type == *layout_entry.type);
1166 const auto tuple_value_idx = std::distance(tuple_value_schema.begin(), tuple_value_it);
1167 const auto tuple_addr_idx = std::distance(tuple_addr_schema.begin(), tuple_addr_it);
1168
1169 if (bit_stride) { // entry with bit stride requires dynamic masking (for scalar loading)
1170 M_insist(tuple_it->type->is_boolean(),
1171 "leaf bit stride currently only for `Boolean` supported");
1172 M_insist(L == 1 or L >= 16,
1173 "booleans must fill at least an entire SIMD vector when loading SIMDfied");
1174 M_insist(L <= 64, "bytes containing booleans must fit into scalar value when loading SIMDfied");
1175 M_insist(L == 1 or leaf_info.stride_in_bits == 1,
1176 "booleans must be packed consecutively when loading SIMDfied");
1177
1178 M_insist(bool(inode_iter), "stride requires repetition");
1179 U64x1 leaf_offset_in_bits = leaf_info.offset_in_bits + *inode_iter * leaf_info.stride_in_bits;
1180 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>() ; // mod 8
1181 I32x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed().to<int32_t>(); // div 8
1182
1183 if constexpr (L > 1) {
1184 BLOCK_OPEN(inits) {
1185 Wasm_insist(leaf_bit_offset == 0U,
1186 "booleans must not start with bit offset when loading SIMDfied");
1187 }
1188 }
1189
1190 key_t key(leaf_info.offset_in_bits % 8, leaf_info.stride_in_bits);
1191 auto [it, inserted] =
1192 M_CONSTEXPR_COND(PointerSharing,
1193 loading_context.try_emplace(std::move(key)),
1194 std::make_pair(loading_context.emplace(loading_context.end(), std::move(key), value_t()), true));
1195 M_insist(inserted == not it->second.mask);
1196 if (inserted) {
1197 BLOCK_OPEN(inits) {
1198 /* do not add `leaf_byte_offset` to pointer here as it may be different for shared entries */
1199 it->second.ptr = base_address.clone() + *inode_byte_offset;
1200 it->second.mask.emplace(); // default-construct for globals to be able to use assignment below
1201 if constexpr (L == 1)
1202 *it->second.mask = 1U << leaf_bit_offset; // init mask for scalar loading
1203 /* no dynamic mask required for SIMDfied loading */
1204 }
1205 } else {
1206 leaf_bit_offset.discard();
1207 }
1208 const auto &ptr = it->second.ptr;
1209
1210 if constexpr (IsStore) {
1211 if constexpr (sql_type<_Bool<L>>) {
1212 /*----- Store value. -----*/
1213 BLOCK_OPEN(stores) {
1214 auto [value, is_null] = env.get<_Bool<L>>(tuple_it->id).split(); // get value
1215 is_null.discard(); // handled at NULL bitmap leaf
1216 if constexpr (L == 1) {
1217 Ptr<U8x1> byte_ptr =
1218 (ptr + leaf_byte_offset).template to<uint8_t*>(); // compute byte address
1219 const auto &mask = *it->second.mask;
1220 setbit(byte_ptr, value, mask.template to<uint8_t>()); // update bit
1221 } else {
1222 using bytes_t = uint_t<L / 8>;
1223 Ptr<PrimitiveExpr<bytes_t>> bytes_ptr =
1224 (ptr + leaf_byte_offset).template to<bytes_t*>(); // compute bytes address
1225 *bytes_ptr = value.bitmask().template to<bytes_t>(); // update all bits at once
1226 }
1227 }
1228 } else {
1229 M_unreachable("invalid type for given number of SIMD lanes");
1230 }
1231 } else {
1232 if constexpr (sql_type<_Bool<L>>) {
1233 /*----- Load value. -----*/
1234 BLOCK_OPEN(loads) {
1235 if constexpr (L == 1) {
1236 U8x1 byte = *(ptr + leaf_byte_offset).template to<uint8_t*>(); // load byte
1237 const auto &mask = *it->second.mask;
1238 if (tuple_value_it != tuple_value_schema.end()) {
1240 (byte.clone() bitand mask.template to<uint8_t>()).template to<bool>() // mask bit with dynamic mask
1241 );
1242 new (&values[tuple_value_idx]) SQL_t(_Boolx1(value));
1243 }
1244 /* Address for booleans not yet supported. */
1245 byte.discard();
1246 } else {
1247 using bytes_t = uint_t<L / 8>;
1248 const Var<PrimitiveExpr<bytes_t>> bytes(
1249 *(ptr + leaf_byte_offset).template to<bytes_t*>() // load bytes
1250 ); // create local variable to avoid cloning when broadcasting `bytes`
1251 auto create_mask = [&]<std::size_t... Is>(std::index_sequence<Is...>) {
1252 return PrimitiveExpr<bytes_t, L>(bytes_t(1UL << Is)...);
1253 };
1254 auto static_mask = create_mask(std::make_index_sequence<L>());
1255 if (tuple_value_it != tuple_value_schema.end()) {
1257 (bytes.template broadcast<L>() bitand static_mask).template to<bool>() // mask bits with static mask
1258 );
1259 new (&values[tuple_value_idx]) SQL_t(_Bool<L>(value));
1260 } else {
1261 bytes.val().discard(); // XXX: remove once address for booleans are supported
1262 }
1263 /* Address for booleans not yet supported. */
1264 }
1265 }
1266 } else {
1267 M_unreachable("invalid type for given number of SIMD lanes");
1268 }
1269 }
1270 } else { // entry without bit stride; if masking is required, we can use a static mask
1271 auto byte_offset = [&]() -> I32x1 {
1272 if (inode_iter and leaf_info.stride_in_bits) {
1273 /* omit `leaf_info.offset_in_bits` here to use it as static offset and mask;
1274 * this is valid since no bit stride means that the leaf byte offset computation is
1275 * independent of the static parts */
1276 U64x1 leaf_offset_in_bits = *inode_iter * leaf_info.stride_in_bits;
1277 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>(); // mod 8
1278 I32x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed().to<int32_t>(); // div 8
1279 BLOCK_OPEN(inits) {
1280 Wasm_insist(leaf_bit_offset == 0U, "no leaf bit offset without bit stride");
1281 }
1282 return *inode_byte_offset + leaf_byte_offset;
1283 } else {
1284 return *inode_byte_offset;
1285 }
1286 }();
1287
1288 const uint8_t static_bit_offset = leaf_info.offset_in_bits % 8;
1289 const int32_t static_byte_offset = leaf_info.offset_in_bits / 8;
1290
1291 key_t key(leaf_info.offset_in_bits % 8, leaf_info.stride_in_bits);
1292 auto [it, inserted] =
1293 M_CONSTEXPR_COND(PointerSharing,
1294 loading_context.try_emplace(std::move(key)),
1295 std::make_pair(loading_context.emplace(loading_context.end(), std::move(key), value_t()), true));
1296 if (inserted) {
1297 BLOCK_OPEN(inits) {
1298 it->second.ptr = base_address.clone() + byte_offset;
1299 }
1300 } else {
1301 byte_offset.discard();
1302 }
1303 const auto &ptr = it->second.ptr;
1304
1305 /*----- Store value depending on its type. -----*/
1306 auto store = overloaded{
1307 [&]<sql_type T>() {
1308 using type = typename T::type;
1309 static constexpr std::size_t lanes = T::num_simd_lanes;
1310 M_insist(static_bit_offset == 0,
1311 "leaf offset of `Numeric`, `Date`, or `DateTime` must be byte aligned");
1312 BLOCK_OPEN(stores) {
1313 auto [value, is_null] = env.get<T>(tuple_it->id).split(); // get value
1314 is_null.discard(); // handled at NULL bitmap leaf
1315 *(ptr + static_byte_offset).template to<type*, lanes>() = value;
1316 }
1317 },
1318 []<typename>() {
1319 M_unreachable("invalid type for given number of SIMD lanes");
1320 }
1321 };
1322 /*----- Load value depending on its type. -----*/
1323 auto load = overloaded{
1324 [&]<sql_type T>() {
1325 using type = typename T::type;
1326 static constexpr std::size_t lanes = T::num_simd_lanes;
1327 M_insist(static_bit_offset == 0,
1328 "leaf offset of `Numeric`, `Date`, or `DateTime` must be byte aligned");
1329 BLOCK_OPEN(loads) {
1330 if (tuple_value_it != tuple_value_schema.end()) {
1332 *(ptr + static_byte_offset).template to<type*, lanes>()
1333 );
1334 new (&values[tuple_value_idx]) SQL_t(T(value));
1335 }
1336 if (tuple_addr_it != tuple_addr_schema.end())
1337 new (&addrs[tuple_addr_idx]) SQL_addr_t(
1338 (ptr + static_byte_offset).template to<type*, lanes>()
1339 );
1340 }
1341 },
1342 []<typename>() {
1343 M_unreachable("invalid type for given number of SIMD lanes");
1344 }
1345 };
1346 /*----- Select call target (store or load) and visit attribute type. -----*/
1347#define CALL(TYPE) if constexpr (IsStore) store.template operator()<TYPE>(); else load.template operator()<TYPE>()
1349 [&](const Boolean&) {
1350 M_insist(L == 1 or leaf_info.stride_in_bits == 8,
1351 "booleans must be packed consecutively in bytes when loading SIMDfied");
1352 if constexpr (sql_type<_Bool<L>>) {
1353 if constexpr (IsStore) {
1354 /*----- Store value. -----*/
1355 BLOCK_OPEN(stores) {
1356 auto [value, is_null] = env.get<_Bool<L>>(tuple_it->id).split(); // get value
1357 is_null.discard(); // handled at NULL bitmap leaf
1358 Ptr<U8<L>> byte_ptr =
1359 (ptr + static_byte_offset).template to<uint8_t*, L>(); // compute byte address
1360 setbit<U8<L>>(byte_ptr, value, static_bit_offset); // update bit
1361 }
1362 } else {
1363 /*----- Load value. -----*/
1364 BLOCK_OPEN(loads) {
1365 U8<L> byte =
1366 *(ptr + static_byte_offset).template to<uint8_t*, L>(); // load byte
1367 U8<L> static_mask(1U << static_bit_offset);
1368
1369 if (tuple_value_it != tuple_value_schema.end()) {
1371 (byte.clone() bitand static_mask.clone()).template to<bool>() // mask bit with static mask
1372 );
1373 new (&values[tuple_value_idx]) SQL_t(_Bool<L>(value));
1374 }
1375 /* Address for booleans not yet supported. */
1376 byte.discard();
1377 static_mask.discard();
1378 }
1379 }
1380 } else {
1381 M_unreachable("invalid type for given number of SIMD lanes");
1382 }
1383 },
1384 [&](const Numeric &n) {
1385 switch (n.kind) {
1386 case Numeric::N_Int:
1387 case Numeric::N_Decimal:
1388 switch (n.size()) {
1389 default: M_unreachable("invalid size");
1390 case 8: CALL(_I8 <L>); break;
1391 case 16: CALL(_I16<L>); break;
1392 case 32: CALL(_I32<L>); break;
1393 case 64: CALL(_I64<L>); break;
1394 }
1395 break;
1396 case Numeric::N_Float:
1397 if (n.size() <= 32)
1398 CALL(_Float<L>);
1399 else
1400 CALL(_Double<L>);
1401 }
1402 },
1403 [&](const CharacterSequence &cs) {
1404 M_insist(L == 1, "string SIMDfication currently not supported");
1405 M_insist(static_bit_offset == 0, "leaf offset of `CharacterSequence` must be byte aligned");
1406 if constexpr (IsStore) {
1407 /*----- Store value. -----*/
1408 BLOCK_OPEN(stores) {
1409 auto value = env.get<NChar>(tuple_it->id); // get value
1410 IF (value.clone().not_null()) {
1411 Ptr<Charx1> address((ptr + static_byte_offset).template to<char*>());
1412 strncpy(address, value, U32x1(cs.size() / 8)).discard();
1413 };
1414 }
1415 } else {
1416 /*----- Load value. -----*/
1417 BLOCK_OPEN(loads) {
1418 Ptr<Charx1> address((ptr + static_byte_offset).template to<char*>());
1419 new (&values[tuple_value_idx]) SQL_t(
1420 NChar(address, layout_entry.nullable(), cs.length, cs.is_varying)
1421 );
1422 /* Omit addresses for character sequences. */
1423 }
1424 }
1425 },
1426 [&](const Date&) { CALL(_I32<L>); },
1427 [&](const DateTime&) { CALL(_I64<L>); },
1428 [](auto&&) { M_unreachable("invalid type"); },
1429 }, *tuple_it->type);
1430#undef CALL
1431 }
1432 }
1433 }
1434
1435 /*----- Recursive lambda to emit stride jumps by processing path from leaves (excluding) to the root. -----*/
1436 auto emit_stride_jumps = [&](decltype(levels.crbegin()) curr, const decltype(levels.crend()) end) -> void {
1437 auto rec = [&](decltype(levels.crbegin()) curr, const decltype(levels.crend()) end, auto rec) -> void {
1438 if (curr == end) return;
1439
1440 const auto inner = std::prev(curr); // the child INode of `curr`
1441 M_insist(curr->num_tuples % inner->num_tuples == 0, "curr must be whole multiple of inner");
1442
1443 /*----- Compute remaining stride for this level. -----*/
1444 const auto num_repetition_inner = curr->num_tuples / inner->num_tuples;
1445 const auto stride_remaining_in_bits = curr->stride_in_bits -
1446 num_repetition_inner * inner->stride_in_bits;
1447 M_insist(stride_remaining_in_bits % 8 == 0,
1448 "remaining stride of INodes must be whole multiple of a byte");
1449
1450 /*----- If there is a remaining stride for this level, emit conditional stride jump. -----*/
1451 if (const int32_t remaining_stride_in_bytes = stride_remaining_in_bits / 8) [[likely]] {
1452 M_insist(curr->num_tuples > 0);
1453 if (curr->num_tuples != 1U) {
1454 Boolx1 cond_mod = (tuple_id % uint32_t(curr->num_tuples)).eqz();
1455 Boolx1 cond_and = (tuple_id bitand uint32_t(curr->num_tuples - 1U)).eqz();
1456 const bool use_and = is_pow_2(curr->num_tuples) and options::remainder_removal;
1457 Boolx1 cond = use_and ? cond_and : cond_mod; // select implementation to use...
1458 (use_and ? cond_mod : cond_and).discard(); // ... and discard the other
1459
1460 /*----- Emit conditional stride jumps. -----*/
1461 IF (cond) {
1462 for (auto &[_, value] : loading_context) {
1463 if (is_predicated) {
1464 M_insist(bool(pred));
1465 value.ptr += Select(*pred, remaining_stride_in_bytes, 0); // possibly emit stride jump
1466 } else {
1467 value.ptr += remaining_stride_in_bytes; // emit stride jump
1468 }
1469 }
1470 if (null_bitmap_ptr) {
1471 if (is_predicated) {
1472 M_insist(bool(pred));
1473 *null_bitmap_ptr +=
1474 Select(*pred, remaining_stride_in_bytes, 0); // possibly emit stride jump
1475 } else {
1476 *null_bitmap_ptr += remaining_stride_in_bytes; // emit stride jump
1477 }
1478 }
1479
1480 /*----- Recurse within IF. -----*/
1481 rec(std::next(curr), end, rec);
1482 };
1483 } else {
1484 for (auto &[_, value] : loading_context) {
1485 if (is_predicated) {
1486 M_insist(bool(pred));
1487 value.ptr += Select(*pred, remaining_stride_in_bytes, 0); // possibly emit stride jump
1488 } else {
1489 value.ptr += remaining_stride_in_bytes; // emit stride jump
1490 }
1491 }
1492 if (null_bitmap_ptr) {
1493 if (is_predicated) {
1494 M_insist(bool(pred));
1495 *null_bitmap_ptr +=
1496 Select(*pred, remaining_stride_in_bytes, 0); // possibly emit stride jump
1497 } else {
1498 *null_bitmap_ptr += remaining_stride_in_bytes; // emit stride jump
1499 }
1500 }
1501
1502 /*----- Recurse within IF. -----*/
1503 rec(std::next(curr), end, rec);
1504 }
1505 } else {
1506 /*----- Recurse without IF. -----*/
1507 rec(std::next(curr), end, rec);
1508 }
1509
1510 };
1511 rec(curr, end, rec);
1512 };
1513
1514 /*----- Process path from DataLayout leaves to the root to emit stride jumps. -----*/
1515 BLOCK_OPEN(jumps) {
1516 /*----- Emit the per-leaf stride jumps, i.e. from one instance of the leaf to the next. -----*/
1517 for (auto &[key, value] : loading_context) {
1518 const uint8_t bit_stride = key.second % 8;
1519 const int32_t byte_stride = key.second / 8;
1520 if (bit_stride) {
1521 M_insist(L == 1);
1522 M_insist(bool(value.mask));
1523 if (is_predicated) {
1524 M_insist(bool(pred));
1525 *value.mask <<= Select(*pred, bit_stride, uint8_t(0)); // possibly advance mask
1526 } else {
1527 *value.mask <<= bit_stride; // advance mask
1528 }
1529 /* If the mask surpasses the first byte, advance pointer to the next byte... */
1530 value.ptr += (*value.mask bitand 0xffU).eqz().template to<int32_t>();
1531 /* ... and remove the lowest byte from the mask. */
1532 *value.mask = Select((*value.mask bitand 0xffU).eqz(), *value.mask >> 8U, *value.mask);
1533 }
1534 if (byte_stride) [[likely]] {
1535 if (is_predicated) {
1536 M_insist(L == 1);
1537 M_insist(bool(pred));
1538 value.ptr += Select(*pred, byte_stride, 0); // possibly advance pointer
1539 } else {
1540 value.ptr += int32_t(L) * byte_stride; // advance pointer
1541 }
1542 }
1543 }
1544 /* Omit the leaf stride jump for the NULL bitmap as it is already done together with the loading. */
1545
1546 if (not levels.empty()) {
1547 /*----- Emit the stride jumps between each leaf to the beginning of the parent INode. -----*/
1548 Block lowest_inode_jumps(false);
1549 for (auto &[key, value] : loading_context) {
1550 M_insist(levels.back().stride_in_bits % 8 == 0,
1551 "stride of INodes must be multiples of a whole byte");
1552 const auto stride_remaining_in_bits = levels.back().stride_in_bits -
1553 levels.back().num_tuples * key.second;
1554 const uint8_t remaining_bit_stride = stride_remaining_in_bits % 8;
1555 const int32_t remaining_byte_stride = stride_remaining_in_bits / 8;
1556 if (remaining_bit_stride) {
1557 M_insist(L == 1);
1558 M_insist(bool(value.mask));
1559 BLOCK_OPEN(lowest_inode_jumps) {
1560 const uint8_t end_bit_offset = (key.first + levels.back().num_tuples * key.second) % 8;
1561 M_insist(end_bit_offset != key.first);
1562 /* Reset the mask to initial bit offset... */
1563 if (is_predicated) {
1564 M_insist(bool(pred));
1565 Wasm_insist(*pred or *value.mask == 1U << key.first,
1566 "if the predicate is not fulfilled, the mask should not be advanced");
1567 }
1568 *value.mask = 1U << key.first;
1569 /* ... and advance pointer to next byte if resetting of the mask surpasses the current byte. */
1570 if (is_predicated) {
1571 M_insist(bool(pred));
1572 value.ptr += Select(*pred, int32_t(end_bit_offset > key.first), 0);
1573 } else {
1574 value.ptr += int32_t(end_bit_offset > key.first);
1575 }
1576 }
1577 }
1578 if (remaining_byte_stride) [[likely]] {
1579 BLOCK_OPEN(lowest_inode_jumps) {
1580 if (is_predicated) {
1581 M_insist(bool(pred));
1582 value.ptr +=
1583 Select(*pred, remaining_byte_stride, 0); // possibly advance pointer
1584 } else {
1585 value.ptr += remaining_byte_stride; // advance pointer
1586 }
1587 }
1588 }
1589 }
1590 if (null_bitmap_ptr) {
1591 M_insist(L == 1);
1592 M_insist(bool(null_bitmap_mask));
1593 M_insist(levels.back().stride_in_bits % 8 == 0,
1594 "stride of INodes must be multiples of a whole byte");
1595 const auto stride_remaining_in_bits = levels.back().stride_in_bits -
1596 levels.back().num_tuples * null_bitmap_stride_in_bits;
1597 const uint8_t remaining_bit_stride = stride_remaining_in_bits % 8;
1598 const int32_t remaining_byte_stride = stride_remaining_in_bits / 8;
1599 if (remaining_bit_stride) {
1600 BLOCK_OPEN(lowest_inode_jumps) {
1601 const uint8_t end_bit_offset =
1602 (null_bitmap_bit_offset + levels.back().num_tuples * null_bitmap_stride_in_bits) % 8;
1603 M_insist(end_bit_offset != null_bitmap_bit_offset);
1604 /* Reset the mask to initial bit offset... */
1605 if (is_predicated) {
1606 M_insist(bool(pred));
1607 Wasm_insist(*pred or *null_bitmap_mask == 1U << null_bitmap_bit_offset,
1608 "if the predicate is not fulfilled, the mask should not be advanced");
1609 }
1610 *null_bitmap_mask = 1U << null_bitmap_bit_offset;
1611 /* ... and advance pointer to next byte if resetting of the mask surpasses the current byte. */
1612 if (is_predicated) {
1613 M_insist(bool(pred));
1614 *null_bitmap_ptr +=
1615 Select(*pred, int32_t(end_bit_offset > null_bitmap_bit_offset), 0);
1616 } else {
1617 *null_bitmap_ptr += int32_t(end_bit_offset > null_bitmap_bit_offset);
1618 }
1619 }
1620 }
1621 if (remaining_byte_stride) [[likely]] {
1622 BLOCK_OPEN(lowest_inode_jumps) {
1623 if (is_predicated) {
1624 M_insist(bool(pred));
1625 *null_bitmap_ptr +=
1626 Select(*pred, remaining_byte_stride, 0); // possibly advance pointer
1627 } else {
1628 *null_bitmap_ptr += remaining_byte_stride; // advance pointer
1629 }
1630 }
1631 }
1632 }
1633
1634 /*----- Emit the stride jumps between all INodes starting at the parent of leaves to the root. -----*/
1635 if (not lowest_inode_jumps.empty()) [[likely]] {
1636 M_insist(levels.back().num_tuples > 0);
1637 if (levels.back().num_tuples != 1U) {
1638 Boolx1 cond_mod = (tuple_id % uint32_t(levels.back().num_tuples)).eqz();
1639 Boolx1 cond_and = (tuple_id bitand uint32_t(levels.back().num_tuples - 1U)).eqz();
1640 const bool use_and = is_pow_2(levels.back().num_tuples) and options::remainder_removal;
1641 Boolx1 cond = use_and ? cond_and : cond_mod; // select implementation to use...
1642 (use_and ? cond_mod : cond_and).discard(); // ... and discard the other
1643
1644 /*----- Emit conditional stride jumps from outermost Block. -----*/
1645 IF (cond) {
1646 lowest_inode_jumps.attach_to_current();
1647
1648 /*----- Recurse within IF. -----*/
1649 emit_stride_jumps(std::next(levels.crbegin()), levels.crend());
1650 };
1651 } else {
1652 lowest_inode_jumps.attach_to_current();
1653
1654 /*----- Recurse within IF. -----*/
1655 emit_stride_jumps(std::next(levels.crbegin()), levels.crend());
1656 }
1657 } else {
1658 /*----- Recurse without outermost IF block. -----*/
1659 emit_stride_jumps(std::next(levels.crbegin()), levels.crend());
1660 }
1661 }
1662 }
1663 });
1664
1665 if constexpr (not IsStore) {
1666 /*----- Combine actual values and possible NULL bits to a new `SQL_t` and add this to the environment. -----*/
1667 for (std::size_t idx = 0; idx != tuple_value_schema.num_entries(); ++idx) {
1668 auto &tuple_entry = tuple_value_schema[idx];
1669 std::visit(overloaded{
1670 [&]<typename T>(Expr<T, L> value) {
1671 BLOCK_OPEN(loads) {
1672 if (has_null_bitmap and layout_schema[tuple_entry.id].second.nullable()) {
1673 Expr<T, L> combined(value.insist_not_null(), null_bits[idx]);
1674 env.add(tuple_entry.id, combined);
1675 } else {
1676 env.add(tuple_entry.id, value);
1677 }
1678 }
1679 },
1680 [&](NChar value) {
1681 if constexpr (L == 1) {
1682 BLOCK_OPEN(loads) {
1683 if (has_null_bitmap and layout_schema[tuple_entry.id].second.nullable()) {
1684 /* introduce variable s.t. uses only load from it */
1685 Var<Ptr<Charx1>> combined(Select(null_bits[idx], Ptr<Charx1>::Nullptr(), value.val()));
1686 env.add(tuple_entry.id, NChar(combined, /* can_be_null=*/ true, value.length(),
1687 value.guarantees_terminating_nul()));
1688 } else {
1689 Var<Ptr<Charx1>> _value(value.val()); // introduce variable s.t. uses only load from it
1690 env.add(tuple_entry.id, NChar(_value, /* can_be_null=*/ false, value.length(),
1691 value.guarantees_terminating_nul()));
1692 }
1693 }
1694 } else {
1695 M_unreachable("string SIMDfication currently not supported");
1696 }
1697 },
1698 [](auto) { M_unreachable("value must be loaded beforehand"); },
1699 [](std::monostate) { M_unreachable("invalid variant"); },
1700 }, values[idx]);
1701 }
1702
1703 /*----- Add addresses to the environment. -----*/
1704 for (std::size_t idx = 0; idx != tuple_addr_schema.num_entries(); ++idx) {
1705 BLOCK_OPEN(loads) {
1706 auto &tuple_entry = tuple_addr_schema[idx];
1707 env.add_addr(tuple_entry.id, std::move(addrs[idx]));
1708 }
1709 }
1710 }
1711
1712 /*----- Destroy created values. -----*/
1713 for (std::size_t idx = 0; idx < tuple_value_schema.num_entries(); ++idx)
1714 values[idx].~SQL_t();
1715 for (std::size_t idx = 0; idx < tuple_addr_schema.num_entries(); ++idx)
1716 addrs[idx].~SQL_addr_t();
1717 if constexpr (not IsStore) {
1718 /*----- Destroy created NULL bits. -----*/
1719 for (std::size_t idx = 0; idx != tuple_value_schema.num_entries(); ++idx) {
1720 if (has_null_bitmap and layout_schema[tuple_value_schema[idx].id].second.nullable())
1721 null_bits[idx].~Bool<L>();
1722 }
1723 }
1724 base_address.discard(); // discard base address (as it was always cloned)
1725
1726#ifndef NDEBUG
1727 if constexpr (IsStore)
1728 M_insist(loads.empty());
1729 else
1730 M_insist(stores.empty());
1731#endif
1732
1733 if constexpr (IsStore)
1734 return std::make_tuple<Block, Block, Block>(std::move(inits), std::move(stores), std::move(jumps));
1735 else
1736 return std::make_tuple<Block, Block, Block>(std::move(inits), std::move(loads), std::move(jumps));
1737}
1738
1739}
1740
1741}
1742
1743template<VariableKind Kind>
1744std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block>
1745m::wasm::compile_store_sequential(const Schema &tuple_value_schema, const Schema &tuple_addr_schema,
1746 Ptr<void> base_address, const storage::DataLayout &layout, std::size_t num_simd_lanes,
1748{
1749 if (options::pointer_sharing) {
1750 switch (num_simd_lanes) {
1751 default: M_unreachable("unsupported number of SIMD lanes");
1752 case 1: return compile_data_layout_sequential<true, 1, false, true>(tuple_value_schema, tuple_addr_schema,
1753 base_address, layout, layout_schema,
1754 tuple_id);
1755 case 2: return compile_data_layout_sequential<true, 2, false, true>(tuple_value_schema, tuple_addr_schema,
1756 base_address, layout, layout_schema,
1757 tuple_id);
1758 case 4: return compile_data_layout_sequential<true, 4, false, true>(tuple_value_schema, tuple_addr_schema,
1759 base_address, layout, layout_schema,
1760 tuple_id);
1761 case 8: return compile_data_layout_sequential<true, 8, false, true>(tuple_value_schema, tuple_addr_schema,
1762 base_address, layout, layout_schema,
1763 tuple_id);
1764 case 16: return compile_data_layout_sequential<true, 16, false, true>(tuple_value_schema, tuple_addr_schema,
1765 base_address, layout, layout_schema,
1766 tuple_id);
1767 case 32: return compile_data_layout_sequential<true, 32, false, true>(tuple_value_schema, tuple_addr_schema,
1768 base_address, layout, layout_schema,
1769 tuple_id);
1770 }
1771 } else {
1772 switch (num_simd_lanes) {
1773 default: M_unreachable("unsupported number of SIMD lanes");
1774 case 1: return compile_data_layout_sequential<true, 1, false, false>(tuple_value_schema,
1775 tuple_addr_schema, base_address,
1776 layout, layout_schema, tuple_id);
1777 case 2: return compile_data_layout_sequential<true, 2, false, false>(tuple_value_schema,
1778 tuple_addr_schema, base_address,
1779 layout, layout_schema, tuple_id);
1780 case 4: return compile_data_layout_sequential<true, 4, false, false>(tuple_value_schema,
1781 tuple_addr_schema, base_address,
1782 layout, layout_schema, tuple_id);
1783 case 8: return compile_data_layout_sequential<true, 8, false, false>(tuple_value_schema,
1784 tuple_addr_schema, base_address,
1785 layout, layout_schema, tuple_id);
1786 case 16: return compile_data_layout_sequential<true, 16, false, false>(tuple_value_schema,
1787 tuple_addr_schema, base_address,
1788 layout, layout_schema, tuple_id);
1789 case 32: return compile_data_layout_sequential<true, 32, false, false>(tuple_value_schema,
1790 tuple_addr_schema, base_address,
1791 layout, layout_schema, tuple_id);
1792 }
1793 }
1794}
1795
1796template<VariableKind Kind>
1797std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block>
1798m::wasm::compile_store_sequential_single_pass(const Schema &tuple_value_schema, const Schema &tuple_addr_schema,
1799 Ptr<void> base_address, const storage::DataLayout &layout,
1800 std::size_t num_simd_lanes, const Schema &layout_schema,
1802{
1803 if (options::pointer_sharing) {
1804 switch (num_simd_lanes) {
1805 default: M_unreachable("unsupported number of SIMD lanes");
1806 case 1: return compile_data_layout_sequential<true, 1, true, true>(tuple_value_schema, tuple_addr_schema,
1807 base_address, layout, layout_schema,
1808 tuple_id);
1809 case 2: return compile_data_layout_sequential<true, 2, true, true>(tuple_value_schema, tuple_addr_schema,
1810 base_address, layout, layout_schema,
1811 tuple_id);
1812 case 4: return compile_data_layout_sequential<true, 4, true, true>(tuple_value_schema, tuple_addr_schema,
1813 base_address, layout, layout_schema,
1814 tuple_id);
1815 case 8: return compile_data_layout_sequential<true, 8, true, true>(tuple_value_schema, tuple_addr_schema,
1816 base_address, layout, layout_schema,
1817 tuple_id);
1818 case 16: return compile_data_layout_sequential<true, 16, true, true>(tuple_value_schema, tuple_addr_schema,
1819 base_address, layout, layout_schema,
1820 tuple_id);
1821 case 32: return compile_data_layout_sequential<true, 32, true, true>(tuple_value_schema, tuple_addr_schema,
1822 base_address, layout, layout_schema,
1823 tuple_id);
1824 }
1825 } else {
1826 switch (num_simd_lanes) {
1827 default: M_unreachable("unsupported number of SIMD lanes");
1828 case 1: return compile_data_layout_sequential<true, 1, true, false>(tuple_value_schema,
1829 tuple_addr_schema, base_address,
1830 layout, layout_schema, tuple_id);
1831 case 2: return compile_data_layout_sequential<true, 2, true, false>(tuple_value_schema,
1832 tuple_addr_schema, base_address,
1833 layout, layout_schema, tuple_id);
1834 case 4: return compile_data_layout_sequential<true, 4, true, false>(tuple_value_schema,
1835 tuple_addr_schema, base_address,
1836 layout, layout_schema, tuple_id);
1837 case 8: return compile_data_layout_sequential<true, 8, true, false>(tuple_value_schema,
1838 tuple_addr_schema, base_address,
1839 layout, layout_schema, tuple_id);
1840 case 16: return compile_data_layout_sequential<true, 16, true, false>(tuple_value_schema,
1841 tuple_addr_schema, base_address,
1842 layout, layout_schema, tuple_id);
1843 case 32: return compile_data_layout_sequential<true, 32, true, false>(tuple_value_schema,
1844 tuple_addr_schema, base_address,
1845 layout, layout_schema, tuple_id);
1846 }
1847 }
1848}
1849
1850template<VariableKind Kind>
1851std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block>
1852m::wasm::compile_load_sequential(const Schema &tuple_value_schema, const Schema &tuple_addr_schema,
1853 Ptr<void> base_address, const storage::DataLayout &layout, std::size_t num_simd_lanes,
1855{
1856 if (options::pointer_sharing) {
1857 switch (num_simd_lanes) {
1858 default: M_unreachable("unsupported number of SIMD lanes");
1859 case 1: return compile_data_layout_sequential<false, 1, true, true>(tuple_value_schema, tuple_addr_schema,
1860 base_address, layout, layout_schema,
1861 tuple_id);
1862 case 2: return compile_data_layout_sequential<false, 2, true, true>(tuple_value_schema, tuple_addr_schema,
1863 base_address, layout, layout_schema,
1864 tuple_id);
1865 case 4: return compile_data_layout_sequential<false, 4, true, true>(tuple_value_schema, tuple_addr_schema,
1866 base_address, layout, layout_schema,
1867 tuple_id);
1868 case 8: return compile_data_layout_sequential<false, 8, true, true>(tuple_value_schema, tuple_addr_schema,
1869 base_address, layout, layout_schema,
1870 tuple_id);
1871 case 16: return compile_data_layout_sequential<false, 16, true, true>(tuple_value_schema, tuple_addr_schema,
1872 base_address, layout, layout_schema,
1873 tuple_id);
1874 case 32: return compile_data_layout_sequential<false, 32, true, true>(tuple_value_schema, tuple_addr_schema,
1875 base_address, layout, layout_schema,
1876 tuple_id);
1877 }
1878 } else {
1879 switch (num_simd_lanes) {
1880 default: M_unreachable("unsupported number of SIMD lanes");
1881 case 1: return compile_data_layout_sequential<false, 1, true, false>(tuple_value_schema,
1882 tuple_addr_schema, base_address,
1883 layout, layout_schema, tuple_id);
1884 case 2: return compile_data_layout_sequential<false, 2, true, false>(tuple_value_schema,
1885 tuple_addr_schema, base_address,
1886 layout, layout_schema, tuple_id);
1887 case 4: return compile_data_layout_sequential<false, 4, true, false>(tuple_value_schema,
1888 tuple_addr_schema, base_address,
1889 layout, layout_schema, tuple_id);
1890 case 8: return compile_data_layout_sequential<false, 8, true, false>(tuple_value_schema,
1891 tuple_addr_schema, base_address,
1892 layout, layout_schema, tuple_id);
1893 case 16: return compile_data_layout_sequential<false, 16, true, false>(tuple_value_schema,
1894 tuple_addr_schema, base_address,
1895 layout, layout_schema, tuple_id);
1896 case 32: return compile_data_layout_sequential<false, 32, true, false>(tuple_value_schema,
1897 tuple_addr_schema, base_address,
1898 layout, layout_schema, tuple_id);
1899 }
1900 }
1901}
1902
1903// explicit instantiations to prevent linker errors
1904template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential(
1905 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Var<U32x1>&
1906);
1907template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential(
1908 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Global<U32x1>&
1909);
1910template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential(
1911 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&,
1913);
1914template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential_single_pass(
1915 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Var<U32x1>&
1916);
1917template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential_single_pass(
1918 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Global<U32x1>&
1919);
1920template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential_single_pass(
1921 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&,
1923);
1924template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_load_sequential(
1925 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Var<U32x1>&
1926);
1927template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_load_sequential(
1928 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Global<U32x1>&
1929);
1930template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_load_sequential(
1931 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&,
1933);
1934
1935namespace m {
1936
1937namespace wasm {
1938
1945template<bool IsStore>
1946void compile_data_layout_point_access(const Schema &_tuple_value_schema, const Schema &_tuple_addr_schema,
1947 Ptr<void> base_address, const storage::DataLayout &layout,
1948 const Schema &layout_schema, U32x1 tuple_id)
1949{
1950 const auto tuple_value_schema = _tuple_value_schema.deduplicate().drop_constants();
1951 const auto tuple_addr_schema = _tuple_addr_schema.deduplicate().drop_constants();
1952
1953 M_insist(tuple_value_schema.num_entries() != 0, "point access must access at least one tuple schema entry");
1954 M_insist(not IsStore or tuple_addr_schema.num_entries() == 0, "addresses are only computed for loads");
1955#ifndef NDEBUG
1956 for (auto &e : tuple_value_schema)
1957 M_insist(layout_schema.find(e.id) != layout_schema.cend(), "tuple value schema entry not found");
1958 for (auto &e : tuple_addr_schema) {
1959 auto it = layout_schema.find(e.id);
1960 M_insist(it != layout_schema.cend(), "tuple address schema entry not found");
1961 M_insist(not it->nullable(), "nullable tuple address schema entry not yet supported");
1962 M_insist(not it->type->is_boolean(), "boolean tuple address schema entry not yet supported");
1963 M_insist(not it->type->is_character_sequence(), "character sequence tuple address schema entry omitted");
1964 }
1965#endif
1966
1968 SQL_t values[tuple_value_schema.num_entries()];
1970 SQL_addr_t *addrs;
1971 if (not tuple_addr_schema.empty())
1972 addrs = static_cast<SQL_addr_t*>(alloca(sizeof(SQL_addr_t) * tuple_addr_schema.num_entries()));
1974 Boolx1 *null_bits;
1975 if constexpr (not IsStore)
1976 null_bits = static_cast<Boolx1*>(alloca(sizeof(Boolx1) * tuple_value_schema.num_entries()));
1977
1978 auto &env = CodeGenContext::Get().env(); // the current codegen environment
1979
1980 /*----- Check whether any of the entries in `tuple_value_schema` can be NULL, so that we need the NULL bitmap. -----*/
1981 const bool needs_null_bitmap = [&]() {
1982 for (auto &tuple_entry : tuple_value_schema) {
1983 if (layout_schema[tuple_entry.id].second.nullable())
1984 return true; // found an entry in `tuple_value_schema` that can be NULL according to `layout_schema`
1985 }
1986 return false; // no attribute in `tuple_value_schema` can be NULL according to `layout_schema`
1987 }();
1988 bool has_null_bitmap = false; // indicates whether the data layout specifies a NULL bitmap
1989
1990 /*----- Visit the data layout. -----*/
1991 layout.for_sibling_leaves([&](const std::vector<DataLayout::leaf_info_t> &leaves,
1992 const DataLayout::level_info_stack_t &levels, uint64_t inode_offset_in_bits)
1993 {
1994 /*----- Compute INode pointer and INode iteration depending on the given tuple ID. -----*/
1995 auto compute_additional_inode_byte_offset = [&](U32x1 tuple_id) -> U64x1 {
1996 auto rec = [&](U32x1 curr_tuple_id, decltype(levels.cbegin()) curr, const decltype(levels.cend()) end,
1997 auto rec) -> U64x1
1998 {
1999 if (curr == end) {
2000 Wasm_insist(curr_tuple_id == tuple_id % uint32_t(levels.back().num_tuples));
2001 return U64x1(0);
2002 }
2003
2004 if (is_pow_2(curr->num_tuples)) {
2005 U32x1 child_iter = curr_tuple_id.clone() >> uint32_t(__builtin_ctzl(curr->num_tuples));
2006 U32x1 inner_tuple_id = curr_tuple_id bitand uint32_t(curr->num_tuples - 1U);
2007 M_insist(curr->stride_in_bits % 8 == 0, "INode stride must be byte aligned");
2008 U64x1 offset_in_bytes = child_iter * uint64_t(curr->stride_in_bits / 8);
2009 return offset_in_bytes + rec(inner_tuple_id, std::next(curr), end, rec);
2010 } else {
2011 U32x1 child_iter = curr_tuple_id.clone() / uint32_t(curr->num_tuples);
2012 U32x1 inner_tuple_id = curr_tuple_id % uint32_t(curr->num_tuples);
2013 M_insist(curr->stride_in_bits % 8 == 0, "INode stride must be byte aligned");
2014 U64x1 offset_in_bytes = child_iter * uint64_t(curr->stride_in_bits / 8);
2015 return offset_in_bytes + rec(inner_tuple_id, std::next(curr), end, rec);
2016 }
2017 };
2018 return rec(tuple_id.clone(), levels.cbegin(), levels.cend(), rec);
2019 };
2020 M_insist(inode_offset_in_bits % 8 == 0, "INode offset must be byte aligned");
2021 const Var<Ptr<void>> inode_ptr(
2022 base_address.clone()
2023 + int32_t(inode_offset_in_bits / 8)
2024 + compute_additional_inode_byte_offset(tuple_id.clone()).make_signed().template to<int32_t>()
2025 );
2026 std::optional<const Var<U32x1>> inode_iter;
2027 M_insist(levels.back().num_tuples != 0, "INode must be large enough for at least one tuple");
2028 if (levels.back().num_tuples != 1) {
2029 inode_iter.emplace(
2030 is_pow_2(levels.back().num_tuples) ? tuple_id bitand uint32_t(levels.back().num_tuples - 1U)
2031 : tuple_id % uint32_t(levels.back().num_tuples)
2032 );
2033 } else {
2034 /* omit computation of INode iteration since it is always the first iteration, i.e. equals 0 */
2035 tuple_id.discard();
2036 }
2037
2038 /*----- Iterate over sibling leaves, i.e. leaf children of a common parent INode, to emit code. -----*/
2039 for (auto &leaf_info : leaves) {
2040 const uint8_t bit_stride = leaf_info.stride_in_bits % 8;
2041
2042 if (leaf_info.leaf.index() == layout_schema.num_entries()) { // NULL bitmap
2043 if (not needs_null_bitmap)
2044 continue;
2045
2046 M_insist(not has_null_bitmap, "at most one bitmap may be specified");
2047 has_null_bitmap = true;
2048 if (bit_stride) { // NULL bitmap with bit stride requires dynamic masking
2049 M_insist(bool(inode_iter), "stride requires repetition");
2050 U64x1 leaf_offset_in_bits = leaf_info.offset_in_bits + *inode_iter * leaf_info.stride_in_bits;
2051 const Var<U8x1> leaf_bit_offset(
2052 (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>() // mod 8
2053 );
2054 I32x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed().to<int32_t>(); // div 8
2055
2056 const Var<Ptr<void>> ptr(inode_ptr + leaf_byte_offset); // pointer to NULL bitmap
2057
2058 /*----- For each tuple entry that can be NULL, create a store/load with offset and mask. --*/
2059 for (std::size_t tuple_idx = 0; tuple_idx != tuple_value_schema.num_entries(); ++tuple_idx) {
2060 auto &tuple_entry = tuple_value_schema[tuple_idx];
2061 const auto &[layout_idx, layout_entry] = layout_schema[tuple_entry.id];
2062 M_insist(*tuple_entry.type == *layout_entry.type);
2063 if (layout_entry.nullable()) { // layout entry may be NULL
2064 U64x1 offset_in_bits = leaf_bit_offset + layout_idx;
2065 U8x1 bit_offset = (offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>() ; // mod 8
2066 I32x1 byte_offset = (offset_in_bits >> uint64_t(3)).make_signed().to<int32_t>(); // div 8
2067 if constexpr (IsStore) {
2068 /*----- Store NULL bit depending on its type. -----*/
2069 auto store = [&]<typename T>() {
2070 auto [value, is_null] = env.get<T>(tuple_entry.id).split(); // get value
2071 value.discard(); // handled at entry leaf
2072 Ptr<U8x1> byte_ptr =
2073 (ptr + byte_offset).template to<uint8_t*>(); // compute byte address
2074 setbit<U8x1>(byte_ptr, is_null, uint8_t(1) << bit_offset); // update bit
2075 };
2077 [&](const Boolean&) { store.template operator()<_Boolx1>(); },
2078 [&](const Numeric &n) {
2079 switch (n.kind) {
2080 case Numeric::N_Int:
2081 case Numeric::N_Decimal:
2082 switch (n.size()) {
2083 default: M_unreachable("invalid size");
2084 case 8: store.template operator()<_I8x1 >(); break;
2085 case 16: store.template operator()<_I16x1>(); break;
2086 case 32: store.template operator()<_I32x1>(); break;
2087 case 64: store.template operator()<_I64x1>(); break;
2088 }
2089 break;
2090 case Numeric::N_Float:
2091 if (n.size() <= 32)
2092 store.template operator()<_Floatx1>();
2093 else
2094 store.template operator()<_Doublex1>();
2095 }
2096 },
2097 [&](const CharacterSequence&) {
2098 auto value = env.get<NChar>(tuple_entry.id); // get value
2099 Ptr<U8x1> byte_ptr =
2100 (ptr + byte_offset).template to<uint8_t*>(); // compute byte address
2101 setbit<U8x1>(byte_ptr, value.is_null(), uint8_t(1) << bit_offset); // update bit
2102 },
2103 [&](const Date&) { store.template operator()<_I32x1>(); },
2104 [&](const DateTime&) { store.template operator()<_I64x1>(); },
2105 [](auto&&) { M_unreachable("invalid type"); },
2106 }, *tuple_entry.type);
2107 } else {
2108 /*----- Load NULL bit. -----*/
2109 U8x1 byte = *(ptr + byte_offset).template to<uint8_t*>(); // load the byte
2110 Var<Boolx1> value((byte bitand (uint8_t(1) << bit_offset)).to<bool>()); // mask bit
2111 new (&null_bits[tuple_idx]) Boolx1(value);
2112 /* Address for NULL bits not yet supported. */
2113 }
2114 } else { // entry must not be NULL
2115#ifndef NDEBUG
2116 if constexpr (IsStore) {
2117 /*----- Check that value is also not NULL. -----*/
2118 auto check = [&]<typename T>() {
2119 Wasm_insist(env.get<T>(tuple_entry.id).not_null(),
2120 "value of non-nullable entry must not be nullable");
2121 };
2123 [&](const Boolean&) { check.template operator()<_Boolx1>(); },
2124 [&](const Numeric &n) {
2125 switch (n.kind) {
2126 case Numeric::N_Int:
2127 case Numeric::N_Decimal:
2128 switch (n.size()) {
2129 default: M_unreachable("invalid size");
2130 case 8: check.template operator()<_I8x1 >(); break;
2131 case 16: check.template operator()<_I16x1>(); break;
2132 case 32: check.template operator()<_I32x1>(); break;
2133 case 64: check.template operator()<_I64x1>(); break;
2134 }
2135 break;
2136 case Numeric::N_Float:
2137 if (n.size() <= 32)
2138 check.template operator()<_Floatx1>();
2139 else
2140 check.template operator()<_Doublex1>();
2141 }
2142 },
2143 [&](const CharacterSequence&) { check.template operator()<NChar>(); },
2144 [&](const Date&) { check.template operator()<_I32x1>(); },
2145 [&](const DateTime&) { check.template operator()<_I64x1>(); },
2146 [](auto&&) { M_unreachable("invalid type"); },
2147 }, *tuple_entry.type);
2148 }
2149#endif
2150 }
2151 }
2152 } else { // NULL bitmap without bit stride can benefit from static masking of NULL bits
2153 auto ptr = [&]() -> Ptr<void> {
2154 if (inode_iter and leaf_info.stride_in_bits) {
2155 /* omit `leaf_info.offset_in_bits` here to add it to the static offsets and masks;
2156 * this is valid since no bit stride means that the leaf byte offset computation is
2157 * independent of the static parts */
2158 U64x1 leaf_offset_in_bits = *inode_iter * leaf_info.stride_in_bits;
2159 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>(); // mod 8
2160 I32x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed().to<int32_t>(); // div 8
2161 Wasm_insist(leaf_bit_offset == 0U, "no leaf bit offset without bit stride");
2162 const Var<Ptr<void>> ptr(inode_ptr + leaf_byte_offset);
2163 return ptr;
2164 } else {
2165 return inode_ptr;
2166 }
2167 }(); // pointer to NULL bitmap
2168
2169 /*----- For each tuple entry that can be NULL, create a store/load with offset and mask. --*/
2170 for (std::size_t tuple_idx = 0; tuple_idx != tuple_value_schema.num_entries(); ++tuple_idx) {
2171 auto &tuple_entry = tuple_value_schema[tuple_idx];
2172 const auto &[layout_idx, layout_entry] = layout_schema[tuple_entry.id];
2173 M_insist(*tuple_entry.type == *layout_entry.type);
2174 if (layout_entry.nullable()) { // layout entry may be NULL
2175 const uint8_t static_bit_offset = (leaf_info.offset_in_bits + layout_idx) % 8;
2176 const int32_t static_byte_offset = (leaf_info.offset_in_bits + layout_idx) / 8;
2177 if constexpr (IsStore) {
2178 /*----- Store NULL bit depending on its type. -----*/
2179 auto store = [&]<typename T>() {
2180 auto [value, is_null] = env.get<T>(tuple_entry.id).split(); // get value
2181 value.discard(); // handled at entry leaf
2182 Ptr<U8x1> byte_ptr =
2183 (ptr.clone() + static_byte_offset).template to<uint8_t*>(); // compute byte address
2184 setbit<U8x1>(byte_ptr, is_null, static_bit_offset); // update bit
2185 };
2187 [&](const Boolean&) { store.template operator()<_Boolx1>(); },
2188 [&](const Numeric &n) {
2189 switch (n.kind) {
2190 case Numeric::N_Int:
2191 case Numeric::N_Decimal:
2192 switch (n.size()) {
2193 default: M_unreachable("invalid size");
2194 case 8: store.template operator()<_I8x1 >(); break;
2195 case 16: store.template operator()<_I16x1>(); break;
2196 case 32: store.template operator()<_I32x1>(); break;
2197 case 64: store.template operator()<_I64x1>(); break;
2198 }
2199 break;
2200 case Numeric::N_Float:
2201 if (n.size() <= 32)
2202 store.template operator()<_Floatx1>();
2203 else
2204 store.template operator()<_Doublex1>();
2205 }
2206 },
2207 [&](const CharacterSequence&) {
2208 auto value = env.get<NChar>(tuple_entry.id); // get value
2209 Ptr<U8x1> byte_ptr =
2210 (ptr.clone() + static_byte_offset).template to<uint8_t*>(); // compute byte address
2211 setbit<U8x1>(byte_ptr, value.is_null(), static_bit_offset); // update bit
2212 },
2213 [&](const Date&) { store.template operator()<_I32x1>(); },
2214 [&](const DateTime&) { store.template operator()<_I64x1>(); },
2215 [](auto&&) { M_unreachable("invalid type"); },
2216 }, *tuple_entry.type);
2217 } else {
2218 /*----- Load NULL bit. -----*/
2219 U8x1 byte = *(ptr.clone() + static_byte_offset).template to<uint8_t*>(); // load the byte
2220 const uint8_t static_mask = 1U << static_bit_offset;
2221 Var<Boolx1> value((byte bitand static_mask).to<bool>()); // mask bit
2222 new (&null_bits[tuple_idx]) Boolx1(value);
2223 /* Address for NULL bits not yet supported. */
2224 }
2225 } else { // entry must not be NULL
2226#ifndef NDEBUG
2227 if constexpr (IsStore) {
2228 /*----- Check that value is also not NULL. -----*/
2229 auto check = [&]<typename T>() {
2230 Wasm_insist(env.get<T>(tuple_entry.id).not_null(),
2231 "value of non-nullable entry must not be nullable");
2232 };
2234 [&](const Boolean&) { check.template operator()<_Boolx1>(); },
2235 [&](const Numeric &n) {
2236 switch (n.kind) {
2237 case Numeric::N_Int:
2238 case Numeric::N_Decimal:
2239 switch (n.size()) {
2240 default: M_unreachable("invalid size");
2241 case 8: check.template operator()<_I8x1 >(); break;
2242 case 16: check.template operator()<_I16x1>(); break;
2243 case 32: check.template operator()<_I32x1>(); break;
2244 case 64: check.template operator()<_I64x1>(); break;
2245 }
2246 break;
2247 case Numeric::N_Float:
2248 if (n.size() <= 32)
2249 check.template operator()<_Floatx1>();
2250 else
2251 check.template operator()<_Doublex1>();
2252 }
2253 },
2254 [&](const CharacterSequence&) { check.template operator()<NChar>(); },
2255 [&](const Date&) { check.template operator()<_I32x1>(); },
2256 [&](const DateTime&) { check.template operator()<_I64x1>(); },
2257 [](auto&&) { M_unreachable("invalid type"); },
2258 }, *tuple_entry.type);
2259 }
2260#endif
2261 }
2262 }
2263 ptr.discard(); // since it was always cloned
2264 }
2265 } else { // regular entry
2266 auto &layout_entry = layout_schema[leaf_info.leaf.index()];
2267 M_insist(*layout_entry.type == *leaf_info.leaf.type());
2268 auto tuple_value_it = tuple_value_schema.find(layout_entry.id);
2269 auto tuple_addr_it = tuple_addr_schema.find(layout_entry.id);
2270 if (tuple_value_it == tuple_value_schema.end() and tuple_addr_it == tuple_addr_schema.end())
2271 continue; // entry not contained in both tuple schemas
2272 auto tuple_it = tuple_value_it != tuple_value_schema.end() ? tuple_value_it : tuple_addr_it;
2273 M_insist(*tuple_it->type == *layout_entry.type);
2274 const auto tuple_value_idx = std::distance(tuple_value_schema.begin(), tuple_value_it);
2275 const auto tuple_addr_idx = std::distance(tuple_addr_schema.begin(), tuple_addr_it);
2276
2277 if (bit_stride) { // entry with bit stride requires dynamic masking
2278 M_insist(tuple_it->type->is_boolean(), "leaf bit stride currently only for `Boolean` supported");
2279
2280 M_insist(bool(inode_iter), "stride requires repetition");
2281 U64x1 leaf_offset_in_bits = leaf_info.offset_in_bits + *inode_iter * leaf_info.stride_in_bits;
2282 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>() ; // mod 8
2283 I32x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed().to<int32_t>(); // div 8
2284
2285 Ptr<U8x1> byte_ptr = (inode_ptr + leaf_byte_offset).template to<uint8_t*>();
2286 U8x1 mask = uint8_t(1) << leaf_bit_offset;
2287
2288 if constexpr (IsStore) {
2289 /*----- Store value. -----*/
2290 auto [value, is_null] = env.get<_Boolx1>(tuple_it->id).split(); // get value
2291 is_null.discard(); // handled at NULL bitmap leaf
2292 setbit(byte_ptr, value, mask); // update bit
2293 } else {
2294 /*----- Load value. -----*/
2295 /* TODO: load byte once, create values with respective mask */
2296 if (tuple_value_it != tuple_value_schema.end()) {
2297 Var<Boolx1> value((*byte_ptr.clone() bitand mask.clone()).template to<bool>()); // mask bit with dynamic mask
2298 new (&values[tuple_value_idx]) SQL_t(_Boolx1(value));
2299 }
2300 /* Address for booleans not yet supported. */
2301 byte_ptr.discard();
2302 mask.discard();
2303 }
2304 } else { // entry without bit stride; if masking is required, we can use a static mask
2305 auto ptr = [&]() -> Ptr<void> {
2306 if (inode_iter and leaf_info.stride_in_bits) {
2307 /* omit `leaf_info.offset_in_bits` here to use it as static offset and mask;
2308 * this is valid since no bit stride means that the leaf byte offset computation is
2309 * independent of the static parts */
2310 U64x1 leaf_offset_in_bits = *inode_iter * leaf_info.stride_in_bits;
2311 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>(); // mod 8
2312 I32x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed().to<int32_t>(); // div 8
2313 Wasm_insist(leaf_bit_offset == 0U, "no leaf bit offset without bit stride");
2314 return inode_ptr + leaf_byte_offset;
2315 } else {
2316 return inode_ptr;
2317 }
2318 }(); // pointer to entry
2319
2320 const uint8_t static_bit_offset = leaf_info.offset_in_bits % 8;
2321 const int32_t static_byte_offset = leaf_info.offset_in_bits / 8;
2322
2323 /*----- Store value depending on its type. -----*/
2324 auto store = [&]<typename T>() {
2325 using type = typename T::type;
2326 M_insist(static_bit_offset == 0,
2327 "leaf offset of `Numeric`, `Date`, or `DateTime` must be byte aligned");
2328 auto [value, is_null] = env.get<T>(tuple_it->id).split(); // get value
2329 is_null.discard(); // handled at NULL bitmap leaf
2330 *(ptr + static_byte_offset).template to<type*>() = value;
2331 };
2332 /*----- Load value depending on its type. -----*/
2333 auto load = [&]<typename T>() {
2334 using type = typename T::type;
2335 M_insist(static_bit_offset == 0,
2336 "leaf offset of `Numeric`, `Date`, or `DateTime` must be byte aligned");
2337 if (tuple_value_it != tuple_value_schema.end()) {
2338 Var<PrimitiveExpr<type>> value(*(ptr.clone() + static_byte_offset).template to<type*>());
2339 new (&values[tuple_value_idx]) SQL_t(T(value));
2340 }
2341 if (tuple_addr_it != tuple_addr_schema.end())
2342 new (&addrs[tuple_addr_idx]) SQL_addr_t(
2343 (ptr.clone() + static_byte_offset).template to<type*>()
2344 );
2345 ptr.discard();
2346 };
2347 /*----- Select call target (store or load) and visit attribute type. -----*/
2348#define CALL(TYPE) if constexpr (IsStore) store.template operator()<TYPE>(); else load.template operator()<TYPE>()
2350 [&](const Boolean&) {
2351 Ptr<U8x1> byte_ptr = (ptr + static_byte_offset).template to<uint8_t*>();
2352 if constexpr (IsStore) {
2353 /*----- Store value. -----*/
2354 auto [value, is_null] = env.get<_Boolx1>(tuple_it->id).split(); // get value
2355 is_null.discard(); // handled at NULL bitmap leaf
2356 setbit<U8x1>(byte_ptr, value, static_bit_offset); // update bit
2357 } else {
2358 /*----- Load value. -----*/
2359 /* TODO: load byte once, create values with respective mask */
2360 const uint8_t static_mask = 1U << static_bit_offset;
2361
2362 if (tuple_value_it != tuple_value_schema.end()) {
2363 Var<Boolx1> value((*byte_ptr.clone() bitand static_mask).to<bool>()); // mask bit
2364 new (&values[tuple_value_idx]) SQL_t(_Boolx1(value));
2365 }
2366 /* Address for booleans not yet supported. */
2367 byte_ptr.discard();
2368 }
2369 },
2370 [&](const Numeric &n) {
2371 switch (n.kind) {
2372 case Numeric::N_Int:
2373 case Numeric::N_Decimal:
2374 switch (n.size()) {
2375 default: M_unreachable("invalid size");
2376 case 8: CALL(_I8x1 ); break;
2377 case 16: CALL(_I16x1); break;
2378 case 32: CALL(_I32x1); break;
2379 case 64: CALL(_I64x1); break;
2380 }
2381 break;
2382 case Numeric::N_Float:
2383 if (n.size() <= 32)
2384 CALL(_Floatx1);
2385 else
2386 CALL(_Doublex1);
2387 }
2388 },
2389 [&](const CharacterSequence &cs) {
2390 M_insist(static_bit_offset == 0, "leaf offset of `CharacterSequence` must be byte aligned");
2391 Ptr<Charx1> addr = (ptr + static_byte_offset).template to<char*>();
2392 if constexpr (IsStore) {
2393 /*----- Store value. -----*/
2394 auto value = env.get<NChar>(tuple_it->id); // get value
2395 IF (value.clone().not_null()) {
2396 strncpy(addr, value, U32x1(cs.size() / 8)).discard();
2397 };
2398 } else {
2399 /*----- Load value. -----*/
2400 new (&values[tuple_value_idx]) SQL_t(
2401 NChar(addr, layout_entry.nullable(), cs.length, cs.is_varying)
2402 );
2403 /* Omit addresses for character sequences. */
2404 }
2405 },
2406 [&](const Date&) { CALL(_I32x1); },
2407 [&](const DateTime&) { CALL(_I64x1); },
2408 [](auto&&) { M_unreachable("invalid type"); },
2409 }, *tuple_it->type);
2410#undef CALL
2411 }
2412 }
2413 }
2414 });
2415
2416 if constexpr (not IsStore) {
2417 /*----- Combine actual values and possible NULL bits to a new `SQL_t` and add this to the environment. -----*/
2418 for (std::size_t idx = 0; idx != tuple_value_schema.num_entries(); ++idx) {
2419 auto &tuple_entry = tuple_value_schema[idx];
2420 std::visit(overloaded{
2421 [&]<typename T>(Expr<T> value) {
2422 if (has_null_bitmap and layout_schema[tuple_entry.id].second.nullable()) {
2423 Expr<T> combined(value.insist_not_null(), null_bits[idx]);
2424 env.add(tuple_entry.id, combined);
2425 } else {
2426 env.add(tuple_entry.id, value);
2427 }
2428 },
2429 [&](NChar value) {
2430 if (has_null_bitmap and layout_schema[tuple_entry.id].second.nullable()) {
2431 /* introduce variable s.t. uses only load from it */
2432 Var<Ptr<Charx1>> combined(Select(null_bits[idx], Ptr<Charx1>::Nullptr(), value.val()));
2433 env.add(tuple_entry.id, NChar(combined, /* can_be_null=*/ true, value.length(),
2434 value.guarantees_terminating_nul()));
2435 } else {
2436 Var<Ptr<Charx1>> _value(value.val()); // introduce variable s.t. uses only load from it
2437 env.add(tuple_entry.id, NChar(_value, /* can_be_null=*/ false, value.length(),
2438 value.guarantees_terminating_nul()));
2439 }
2440 },
2441 [](auto) { M_unreachable("SIMDfication currently not supported"); },
2442 [](std::monostate) { M_unreachable("value must be loaded beforehand"); },
2443 }, values[idx]);
2444 }
2445
2446 /*----- Add addresses to the environment. -----*/
2447 for (std::size_t idx = 0; idx != tuple_addr_schema.num_entries(); ++idx) {
2448 auto &tuple_entry = tuple_addr_schema[idx];
2449 env.add_addr(tuple_entry.id, std::move(addrs[idx]));
2450 }
2451 }
2452
2453 /*----- Destroy created values and addresses. -----*/
2454 for (std::size_t idx = 0; idx < tuple_value_schema.num_entries(); ++idx)
2455 values[idx].~SQL_t();
2456 for (std::size_t idx = 0; idx < tuple_addr_schema.num_entries(); ++idx)
2457 addrs[idx].~SQL_addr_t();
2458 if constexpr (not IsStore) {
2459 /*----- Destroy created NULL bits. -----*/
2460 for (std::size_t idx = 0; idx != tuple_value_schema.num_entries(); ++idx) {
2461 if (has_null_bitmap and layout_schema[tuple_value_schema[idx].id].second.nullable())
2462 null_bits[idx].~Boolx1();
2463 }
2464 }
2465 base_address.discard(); // discard base address (as it was always cloned)
2466}
2467
2468}
2469
2470}
2471
2472void m::wasm::compile_store_point_access(const Schema &tuple_value_schema, const Schema &tuple_addr_schema,
2473 Ptr<void> base_address, const DataLayout &layout, const Schema &layout_schema,
2474 U32x1 tuple_id)
2475{
2476 return compile_data_layout_point_access<true>(tuple_value_schema, tuple_addr_schema, base_address, layout,
2477 layout_schema, tuple_id);
2478}
2479
2480void m::wasm::compile_load_point_access(const Schema &tuple_value_schema, const Schema &tuple_addr_schema,
2481 Ptr<void> base_address, const DataLayout &layout, const Schema &layout_schema,
2482 U32x1 tuple_id)
2483{
2484 return compile_data_layout_point_access<false>(tuple_value_schema, tuple_addr_schema, base_address, layout,
2485 layout_schema, tuple_id);
2486}
2487
2488
2489/*======================================================================================================================
2490 * Buffer
2491 *====================================================================================================================*/
2492
2493template<bool IsGlobal>
2494Buffer<IsGlobal>::Buffer(const Schema &schema, const DataLayoutFactory &factory, bool load_simdfied,
2495 std::size_t num_tuples, setup_t setup, pipeline_t pipeline, teardown_t teardown)
2496 : schema_(std::cref(schema))
2497 , layout_(factory.make(schema, num_tuples))
2498 , load_simdfied_(load_simdfied)
2499 , setup_(std::move(setup))
2500 , pipeline_(std::move(pipeline))
2501 , teardown_(std::move(teardown))
2502{
2503 M_insist(schema.num_entries() != 0, "buffer schema must not be empty");
2504
2505 if constexpr (IsGlobal) {
2506 if (layout_.is_finite()) {
2507 /*----- Pre-allocate memory for entire buffer. Use maximal possible alignment requirement of 8 bytes. ----*/
2508 const uint32_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2509 const uint32_t num_children =
2511 storage_.base_address_ =
2512 Module::Allocator().pre_allocate(num_children * child_size_in_bytes, /* alignment= */ 8);
2513 } else {
2514 storage_.capacity_.emplace(); // create global for capacity
2515 }
2516 }
2517}
2518
2519template<bool IsGlobal>
2521{
2522 if constexpr (IsGlobal) { // free memory of global buffer when object is destroyed and no use may occur later
2523 if (not layout_.is_finite()) {
2524 /*----- Deallocate memory for buffer. -----*/
2525 M_insist(bool(storage_.capacity_));
2526 const uint32_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2527 auto buffer_size_in_bytes =
2528 (*storage_.capacity_ / uint32_t(layout_.child().num_tuples())) * child_size_in_bytes;
2529 Module::Allocator().deallocate(storage_.base_address_, buffer_size_in_bytes);
2530 }
2531 }
2532}
2533
2534template<bool IsGlobal>
2536 param_t _tuple_addr_schema) const
2537{
2538#ifndef NDEBUG
2539 if (_tuple_value_schema) {
2540 for (auto &e : _tuple_value_schema->get())
2541 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple value schema entry not found");
2542 }
2543 if (_tuple_addr_schema) {
2544 for (auto &e : _tuple_addr_schema->get())
2545 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple address schema entry not found");
2546 }
2547#endif
2548
2549 static Schema empty_schema;
2550 const auto &tuple_value_schema = _tuple_value_schema ? _tuple_value_schema->get() : schema_.get();
2551 const auto &tuple_addr_schema = _tuple_addr_schema ? _tuple_addr_schema->get() : empty_schema;
2552
2553 return buffer_load_proxy_t(*this, tuple_value_schema, tuple_addr_schema);
2554}
2555
2556template<bool IsGlobal>
2558{
2559#ifndef NDEBUG
2560 if (tuple_schema) {
2561 for (auto &e : tuple_schema->get())
2562 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple schema entry not found");
2563 }
2564#endif
2565
2566 return tuple_schema ? buffer_store_proxy_t(*this, *tuple_schema) : buffer_store_proxy_t(*this, schema_);
2567}
2568
2569template<bool IsGlobal>
2571{
2572#ifndef NDEBUG
2573 if (tuple_schema) {
2574 for (auto &e : tuple_schema->get())
2575 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple schema entry not found");
2576 }
2577#endif
2578
2579 return tuple_schema ? buffer_swap_proxy_t(*this, *tuple_schema) : buffer_swap_proxy_t(*this, schema_);
2580}
2581
2582template<bool IsGlobal>
2584{
2585 M_insist(not base_address_, "must not call `setup()` twice");
2586 M_insist(not size_, "must not call `setup()` twice");
2587 M_insist(not capacity_, "must not call `setup()` twice");
2588 M_insist(not first_iteration_, "must not call `setup()` twice");
2589
2590 /*----- Create local variables. -----*/
2591 base_address_.emplace();
2592 size_.emplace();
2593 if (not layout_.is_finite()) {
2594 capacity_.emplace();
2595 first_iteration_.emplace(true); // set to true
2596 }
2597
2598 /*----- For global buffers, read values from global backups into local variables. -----*/
2599 if constexpr (IsGlobal) {
2600 /* omit assigning base address here as it will always be set below */
2601 *size_ = storage_.size_;
2602 if (not layout_.is_finite()) {
2603 M_insist(bool(storage_.capacity_));
2604 *capacity_ = *storage_.capacity_;
2605 }
2606 }
2607
2608 if (layout_.is_finite()) {
2609 if constexpr (IsGlobal) {
2610 *base_address_ = storage_.base_address_; // buffer always already pre-allocated
2611 } else {
2612 /*----- Pre-allocate memory for entire buffer. Use maximal possible alignment requirement of 8 bytes. ----*/
2613 const uint32_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2614 const uint32_t num_children =
2615 (layout_.num_tuples() + layout_.child().num_tuples() - 1) / layout_.child().num_tuples();
2616 *base_address_ = Module::Allocator().pre_allocate(num_children * child_size_in_bytes, /* alignment= */ 8);
2617 }
2618 } else {
2619 if constexpr (IsGlobal) {
2620 IF (*capacity_ == 0U) { // buffer not yet allocated
2621 /*----- Set initial capacity. -----*/
2622 *capacity_ = uint32_t(layout_.child().num_tuples());
2623
2624 /*----- Allocate memory for one child instance. Use max. possible alignment requirement of 8 bytes. --*/
2625 const uint32_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2626 *base_address_ = Module::Allocator().allocate(child_size_in_bytes, /* alignment= */ 8);
2627 } ELSE {
2628 *base_address_ = storage_.base_address_;
2629 };
2630 } else {
2631 /*----- Set initial capacity. -----*/
2632 *capacity_ = uint32_t(layout_.child().num_tuples());
2633
2634 /*----- Allocate memory for one child instance. Use max. possible alignment requirement of 8 bytes. -----*/
2635 const uint32_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2636 *base_address_ = Module::Allocator().allocate(child_size_in_bytes, /* alignment= */ 8);
2637 }
2638 }
2639}
2640
2641template<bool IsGlobal>
2643{
2644 M_insist(bool(base_address_), "must call `setup()` before");
2645 M_insist(bool(size_), "must call `setup()` before");
2646 M_insist(not layout_.is_finite() == bool(capacity_), "must call `setup()` before");
2647 M_insist(not layout_.is_finite() == bool(first_iteration_), "must call `setup()` before");
2648
2649 if constexpr (not IsGlobal) { // free memory of local buffer when user calls teardown method
2650 if (not layout_.is_finite()) {
2651 /*----- Deallocate memory for buffer. -----*/
2652 const uint32_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2653 auto buffer_size_in_bytes = (*capacity_ / uint32_t(layout_.child().num_tuples())) * child_size_in_bytes;
2654 Module::Allocator().deallocate(*base_address_, buffer_size_in_bytes);
2655 }
2656 }
2657
2658 /*----- For global buffers, write values from local variables into global backups. -----*/
2659 if constexpr (IsGlobal) {
2660 storage_.base_address_ = *base_address_;
2661 storage_.size_ = *size_;
2662 if (not layout_.is_finite()) {
2663 M_insist(bool(storage_.capacity_));
2664 *storage_.capacity_ = *capacity_;
2665 }
2666 }
2667
2668 /*----- Destroy local variables. -----*/
2669 base_address_.reset();
2670 size_.reset();
2671 if (not layout_.is_finite()) {
2672 capacity_.reset();
2673 first_iteration_->val().discard(); // artificial use to silence diagnostics if `consume()` is not called
2674 first_iteration_.reset();
2675 }
2676}
2677
2678template<bool IsGlobal>
2679void Buffer<IsGlobal>::resume_pipeline(param_t _tuple_value_schema, param_t _tuple_addr_schema) const
2680{
2681 if (not pipeline_)
2682 return;
2683
2684 static Schema empty_schema;
2685 const auto &tuple_value_schema = _tuple_value_schema ? _tuple_value_schema->get() : schema_.get();
2686 const auto &tuple_addr_schema = _tuple_addr_schema ? _tuple_addr_schema->get() : empty_schema;
2687
2688#ifndef NDEBUG
2689 for (auto &e : tuple_value_schema)
2690 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple value schema entry not found");
2691 for (auto &e : tuple_addr_schema)
2692 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple addr schema entry not found");
2693#endif
2694
2695 /*----- Create function on-demand to assert that all needed identifiers are already created. -----*/
2696 if (not resume_pipeline_) {
2697 /*----- Create function to resume the pipeline for each tuple contained in the buffer. -----*/
2698 FUNCTION(resume_pipeline, void(void*, uint32_t))
2699 {
2700 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
2701
2702 /*----- Access base address and size parameters. -----*/
2703 Ptr<void> base_address = PARAMETER(0);
2704 U32x1 size = PARAMETER(1);
2705
2706 /*----- Compute poss. number of SIMD lanes and decide which to use with regard to other ops. preferences. */
2707 const auto num_simd_lanes_preferred =
2708 CodeGenContext::Get().num_simd_lanes_preferred(); // get other operators preferences
2709 const std::size_t num_simd_lanes =
2710 load_simdfied_ ? std::max<std::size_t>({ num_simd_lanes_preferred,
2711 get_num_simd_lanes(layout_, schema_, tuple_value_schema),
2712 tuple_addr_schema.empty() ? 0UL : 4UL }) // 32-bit pointers and 128-bit SIMD vectors
2713 : 1;
2715
2716 /*----- Emit setup code *before* compiling data layout to not overwrite its temporary boolean variables. -*/
2717 setup_();
2718
2719 Var<U32x1> load_tuple_id; // default initialized to 0
2720
2721 if (tuple_value_schema.num_entries() == 0 and tuple_addr_schema.num_entries() == 0) {
2722 /*----- If no attributes must be loaded, generate a loop just executing the pipeline `size`-times. -----*/
2723 WHILE (load_tuple_id < size) {
2724 load_tuple_id += uint32_t(num_simd_lanes);
2725 pipeline_();
2726 }
2727 base_address.discard(); // since it is not needed
2728 } else {
2729 /*----- Compile data layout to generate sequential load from buffer. -----*/
2730 auto [load_inits, loads, load_jumps] =
2731 compile_load_sequential(tuple_value_schema, tuple_addr_schema, base_address, layout_,
2732 num_simd_lanes, schema_, load_tuple_id);
2733
2734 /*----- Generate loop for loading entire buffer, with the pipeline emitted into the loop body. -----*/
2735 load_inits.attach_to_current();
2736 WHILE (load_tuple_id < size) {
2737 loads.attach_to_current();
2738 pipeline_();
2739 load_jumps.attach_to_current();
2740 }
2741 }
2742
2743 /*----- Emit teardown code. -----*/
2744 teardown_();
2745 }
2746 resume_pipeline_ = std::move(resume_pipeline);
2747 }
2748
2749 /*----- Call created function. -----*/
2750 M_insist(bool(resume_pipeline_));
2751 (*resume_pipeline_)(base_address(), size()); // base address and size as arguments
2752}
2753
2754template<bool IsGlobal>
2755void Buffer<IsGlobal>::resume_pipeline_inline(param_t tuple_value_schema, param_t tuple_addr_schema) const
2756{
2757 execute_pipeline_inline(setup_, pipeline_, teardown_, std::move(tuple_value_schema), std::move(tuple_addr_schema));
2758}
2759
2760template<bool IsGlobal>
2762 param_t _tuple_value_schema, param_t _tuple_addr_schema) const
2763{
2764 if (not pipeline)
2765 return;
2766
2767 static Schema empty_schema;
2768 const auto &tuple_value_schema = _tuple_value_schema ? _tuple_value_schema->get() : schema_.get();
2769 const auto &tuple_addr_schema = _tuple_addr_schema ? _tuple_addr_schema->get() : empty_schema;
2770
2771#ifndef NDEBUG
2772 for (auto &e : tuple_value_schema)
2773 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple value schema entry not found");
2774 for (auto &e : tuple_addr_schema)
2775 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple addr schema entry not found");
2776#endif
2777
2778 /*----- Create function to resume the pipeline for each tuple contained in the buffer. -----*/
2779 FUNCTION(resume_pipeline, void(void*, uint32_t))
2780 {
2781 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
2782
2783 /*----- Access base address and size parameters. -----*/
2784 Ptr<void> base_address = PARAMETER(0);
2785 U32x1 size = PARAMETER(1);
2786
2787 /*----- Compute poss. number of SIMD lanes and decide which to use with regard to other ops. preferences. */
2788 const auto num_simd_lanes_preferred =
2789 CodeGenContext::Get().num_simd_lanes_preferred(); // get other operators preferences
2790 const std::size_t num_simd_lanes =
2791 load_simdfied_ ? std::max<std::size_t>({ num_simd_lanes_preferred,
2792 get_num_simd_lanes(layout_, schema_, tuple_value_schema),
2793 tuple_addr_schema.empty() ? 0UL : 4UL }) // 32-bit pointers and 128-bit SIMD vectors
2794 : 1;
2796
2797 /*----- Emit setup code *before* compiling data layout to not overwrite its temporary boolean variables. -*/
2798 setup();
2799
2800 Var<U32x1> load_tuple_id; // default initialized to 0
2801
2802 if (tuple_value_schema.num_entries() == 0 and tuple_addr_schema.num_entries() == 0) {
2803 /*----- If no attributes must be loaded, generate a loop just executing the pipeline `size`-times. -----*/
2804 WHILE (load_tuple_id < size) {
2805 load_tuple_id += uint32_t(num_simd_lanes);
2806 pipeline();
2807 }
2808 base_address.discard(); // since it is not needed
2809 } else {
2810 /*----- Compile data layout to generate sequential load from buffer. -----*/
2811 auto [load_inits, loads, load_jumps] =
2812 compile_load_sequential(tuple_value_schema, tuple_addr_schema, base_address, layout_, num_simd_lanes,
2813 schema_, load_tuple_id);
2814
2815 /*----- Generate loop for loading entire buffer, with the pipeline emitted into the loop body. -----*/
2816 load_inits.attach_to_current();
2817 WHILE (load_tuple_id < size) {
2818 loads.attach_to_current();
2819 pipeline();
2820 load_jumps.attach_to_current();
2821 }
2822 }
2823
2824 /*----- Emit teardown code. -----*/
2825 teardown();
2826 }
2827
2828 /*----- Call created function. -----*/
2829 resume_pipeline(base_address(), size()); // base address and size as arguments
2830}
2831
2832template<bool IsGlobal>
2834 param_t _tuple_value_schema, param_t _tuple_addr_schema) const
2835{
2836 if (not pipeline)
2837 return;
2838
2839 static Schema empty_schema;
2840 const auto &tuple_value_schema = _tuple_value_schema ? _tuple_value_schema->get() : schema_.get();
2841 const auto &tuple_addr_schema = _tuple_addr_schema ? _tuple_addr_schema->get() : empty_schema;
2842
2843#ifndef NDEBUG
2844 for (auto &e : tuple_value_schema)
2845 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple value schema entry not found");
2846 for (auto &e : tuple_addr_schema)
2847 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple addr schema entry not found");
2848#endif
2849
2850 /*----- Access base address and size depending on whether they are globals or locals. -----*/
2851 Ptr<void> base_address =
2852 M_CONSTEXPR_COND(IsGlobal,
2853 base_address_ ? base_address_->val() : Var<Ptr<void>>(storage_.base_address_.val()).val(),
2854 ({ M_insist(bool(base_address_)); base_address_->val(); }));
2855 U32x1 size =
2856 M_CONSTEXPR_COND(IsGlobal,
2857 size_ ? size_->val() : Var<U32x1>(storage_.size_.val()).val(),
2858 ({ M_insist(bool(size_)); size_->val(); }));
2859
2860 /*----- If predication is used, compute number of tuples to load from buffer depending on predicate. -----*/
2861 std::optional<Var<Boolx1>> pred; // use variable since WHILE loop will clone it (for IF and DO_WHILE)
2862 if (auto &env = CodeGenContext::Get().env(); env.predicated()) {
2863 M_insist(CodeGenContext::Get().num_simd_lanes() == 1, "invalid number of SIMD lanes");
2864 pred = env.extract_predicate<_Boolx1>().is_true_and_not_null();
2865 }
2866 U32x1 num_tuples = pred ? Select(*pred, size, 0U) : size;
2867
2868 /*----- Compute possible number of SIMD lanes and decide which to use with regard to other operators preferences. */
2869 const auto num_simd_lanes_preferred =
2870 CodeGenContext::Get().num_simd_lanes_preferred(); // get other operators preferences
2871 const std::size_t num_simd_lanes =
2872 load_simdfied_ ? std::max<std::size_t>({ num_simd_lanes_preferred,
2873 get_num_simd_lanes(layout_, schema_, tuple_value_schema),
2874 tuple_addr_schema.empty() ? 0UL : 4UL }) // 32-bit pointers and 128-bit SIMD vectors
2875 : 1;
2877
2878 /*----- Emit setup code *before* compiling data layout to not overwrite its temporary boolean variables. -----*/
2879 setup();
2880
2881 Var<U32x1> load_tuple_id(0); // explicitly (re-)set tuple ID to 0
2882
2883 if (tuple_value_schema.num_entries() == 0 and tuple_addr_schema.num_entries() == 0) {
2884 /*----- If no attributes must be loaded, generate a loop just executing the pipeline `size`-times. -----*/
2885 WHILE (load_tuple_id < num_tuples) {
2886 load_tuple_id += uint32_t(num_simd_lanes);
2887 pipeline();
2888 }
2889 base_address.discard(); // since it is not needed
2890 } else {
2891 /*----- Compile data layout to generate sequential load from buffer. -----*/
2892 auto [load_inits, loads, load_jumps] =
2893 compile_load_sequential(tuple_value_schema, tuple_addr_schema, base_address, layout_,
2894 num_simd_lanes, schema_, load_tuple_id);
2895
2896 /*----- Generate loop for loading entire buffer, with the pipeline emitted into the loop body. -----*/
2897 load_inits.attach_to_current();
2898 WHILE (load_tuple_id < num_tuples) {
2899 loads.attach_to_current();
2900 pipeline();
2901 load_jumps.attach_to_current();
2902 }
2903 }
2904
2905 /*----- Emit teardown code. -----*/
2906 teardown();
2907}
2908
2909template<bool IsGlobal>
2911{
2912 M_insist(bool(base_address_), "must call `setup()` before");
2913 M_insist(bool(size_), "must call `setup()` before");
2914 M_insist(not layout_.is_finite() == bool(capacity_), "must call `setup()` before");
2915 M_insist(not layout_.is_finite() == bool(first_iteration_), "must call `setup()` before");
2916
2917 /*----- Compile data layout to generate sequential single-pass store into the buffer. -----*/
2918 /* We are able to use a single-pass store, i.e. *local* pointers and masks, since we explicitly save the needed
2919 * variables, i.e. base address and size, using *global* backups and restore them before performing the actual
2920 * store in the case of global buffers. For local buffers, stores must be done in a single pass anyway. */
2921 static Schema empty_schema;
2922 auto [_store_inits, stores, _store_jumps] =
2923 compile_store_sequential_single_pass(schema_, empty_schema, *base_address_, layout_,
2924 CodeGenContext::Get().num_simd_lanes(), schema_, *size_);
2925 Block store_inits(std::move(_store_inits)), store_jumps(std::move(_store_jumps));
2926
2927 if (layout_.is_finite()) {
2928 IF (*size_ == 0U) { // buffer empty
2929 /*----- Emit initialization code for storing (i.e. (re-)set to first buffer slot). -----*/
2930 store_inits.attach_to_current();
2931 };
2932 } else {
2933 IF (*size_ == *capacity_) { // buffer full
2934 /*----- Resize buffer by doubling its capacity. -----*/
2935 const uint32_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2936 auto buffer_size_in_bytes = (*capacity_ / uint32_t(layout_.child().num_tuples())) * child_size_in_bytes;
2937 auto ptr = Module::Allocator().allocate(buffer_size_in_bytes.clone());
2938 Wasm_insist(ptr == *base_address_ + buffer_size_in_bytes.make_signed(),
2939 "buffer could not be resized sequentially in memory");
2940 *capacity_ *= 2U;
2941 };
2942
2943 IF (*first_iteration_) {
2944 /*----- Emit initialization code for storing (i.e. set to current buffer slot). -----*/
2945 store_inits.attach_to_current();
2946
2947 *first_iteration_ = false;
2948 };
2949 }
2950
2951 /*----- Emit storing code. -----*/
2952 stores.attach_to_current();
2953
2954 if (layout_.is_finite()) {
2955 IF (*size_ == uint32_t(layout_.num_tuples() - CodeGenContext::Get().num_simd_lanes())) { // buffer full
2956 /*----- Resume pipeline for each tuple in buffer and reset size of buffer to 0. -----*/
2957 *size_ = uint32_t(layout_.num_tuples()); // increment size of buffer to resume pipeline even for last tuple
2958 resume_pipeline();
2959 *size_ = 0U;
2960 } ELSE { // buffer not full
2961 /*----- Emit advancing code to next buffer slot and increment size of buffer. -----*/
2962 store_jumps.attach_to_current();
2963 };
2964 } else {
2965 /*----- Emit advancing code to next buffer slot and increment size of buffer. -----*/
2966 store_jumps.attach_to_current();
2967 }
2968}
2969
2970// explicit instantiations to prevent linker errors
2971template struct m::wasm::Buffer<false>;
2972template struct m::wasm::Buffer<true>;
2973
2974
2975/*======================================================================================================================
2976 * buffer accesses
2977 *====================================================================================================================*/
2978
2979template<bool IsGlobal>
2980void buffer_swap_proxy_t<IsGlobal>::operator()(U32x1 first, U32x1 second)
2981{
2982 /*----- Create load proxy. -----*/
2983 auto load = buffer_.get().create_load_proxy(schema_.get());
2984
2985 /*----- Load first tuple into fresh environment. -----*/
2986 auto env_first = [&](){
2988 load(first.clone());
2989 return S.extract();
2990 }();
2991
2992 operator()(first, second, env_first);
2993}
2994
2995template<bool IsGlobal>
2996void buffer_swap_proxy_t<IsGlobal>::operator()(U32x1 first, U32x1 second, const Environment &env_first)
2997{
2998 /*----- Create load and store proxies. -----*/
2999 auto load = buffer_.get().create_load_proxy(schema_.get());
3000 auto store = buffer_.get().create_store_proxy(schema_.get());
3001
3002 /*----- Temporarily save first tuple by creating variable or separate string buffer. -----*/
3003 Environment _env_first;
3004 for (auto &e : schema_.get()) {
3005 std::visit(overloaded {
3006 [&](NChar value) -> void {
3007 Var<Ptr<Charx1>> ptr; // always set here
3008 IF (value.clone().is_null()) {
3009 ptr = Ptr<Charx1>::Nullptr();
3010 } ELSE {
3011 ptr = Module::Allocator().pre_malloc<char>(value.size_in_bytes());
3012 strncpy(ptr, value, U32x1(value.size_in_bytes())).discard();
3013 };
3014 _env_first.add(e.id, NChar(ptr, value.can_be_null(), value.length(), value.guarantees_terminating_nul()));
3015 },
3016 [&]<typename T>(Expr<T> value) -> void {
3017 if (value.can_be_null()) {
3018 Var<Expr<T>> var(value);
3019 _env_first.add(e.id, var);
3020 } else {
3021 Var<PrimitiveExpr<T>> var(value.insist_not_null());
3022 _env_first.add(e.id, Expr<T>(var));
3023 }
3024 },
3025 [](auto) -> void { M_unreachable("SIMDfication currently not supported"); },
3026 [](std::monostate) -> void { M_unreachable("value must be loaded beforehand"); },
3027 }, env_first.get(e.id));
3028 }
3029
3030 /*----- Load second tuple in scoped environment and store it directly at first tuples address. -----*/
3031 {
3033 load(second.clone());
3034 store(first);
3035 }
3036
3037 /*----- Store temporarily saved first tuple at second tuples address. ----*/
3038 {
3039 auto S = CodeGenContext::Get().scoped_environment(std::move(_env_first));
3040 store(second);
3041 }
3042}
3043
3044template<bool IsGlobal>
3045void buffer_swap_proxy_t<IsGlobal>::operator()(U32x1 first, U32x1 second, const Environment &env_first,
3046 const Environment &env_second)
3047{
3048 /*----- Create store proxy. -----*/
3049 auto store = buffer_.get().create_store_proxy(schema_.get());
3050
3051 /*----- Temporarily save first tuple by creating variable or separate string buffer. -----*/
3052 Environment _env_first;
3053 for (auto &e : schema_.get()) {
3054 std::visit(overloaded {
3055 [&](NChar value) -> void {
3056 Var<Ptr<Charx1>> ptr; // always set here
3057 IF (value.clone().is_null()) {
3058 ptr = Ptr<Charx1>::Nullptr();
3059 } ELSE {
3060 ptr = Module::Allocator().pre_malloc<char>(value.size_in_bytes());
3061 strncpy(ptr, value, U32x1(value.size_in_bytes())).discard();
3062 };
3063 _env_first.add(e.id, NChar(ptr, value.can_be_null(), value.length(), value.guarantees_terminating_nul()));
3064 },
3065 [&]<typename T>(Expr<T> value) -> void {
3066 if (value.can_be_null()) {
3067 Var<Expr<T>> var(value);
3068 _env_first.add(e.id, var);
3069 } else {
3070 Var<PrimitiveExpr<T>> var(value.insist_not_null());
3071 _env_first.add(e.id, Expr<T>(var));
3072 }
3073 },
3074 [](auto) -> void { M_unreachable("SIMDfication currently not supported"); },
3075 [](std::monostate) -> void { M_unreachable("value must be loaded beforehand"); }
3076 }, env_first.get(e.id));
3077 }
3078
3079 /*----- Store already loaded second tuple directly at first tuples address. -----*/
3080 {
3082 CodeGenContext::Get().env().add(env_second);
3083 store(first);
3084 }
3085
3086 /*----- Store temporarily saved first tuple at second tuples address. ----*/
3087 {
3088 auto S = CodeGenContext::Get().scoped_environment(std::move(_env_first));
3089 store(second);
3090 }
3091}
3092
3093// explicit instantiations to prevent linker errors
3096
3097
3098/*======================================================================================================================
3099 * string comparison
3100 *====================================================================================================================*/
3101
3102_I32x1 m::wasm::strncmp(NChar _left, NChar _right, U32x1 len, bool reverse)
3103{
3104 static thread_local struct {} _; // unique caller handle
3105 struct data_t : GarbageCollectedData
3106 {
3107 public:
3108 using fn_t = int32_t(uint32_t, uint32_t, char*, char*, uint32_t);
3109 std::optional<FunctionProxy<fn_t>> strncmp_terminating_nul;
3110 std::optional<FunctionProxy<fn_t>> strncmp_no_terminating_nul;
3111
3112 data_t(GarbageCollectedData &&d) : GarbageCollectedData(std::move(d)) { }
3113 };
3114 auto &d = Module::Get().add_garbage_collected_data<data_t>(&_); // garbage collect the `data_t` instance
3115
3116 auto strncmp_non_null = [&d, &_left, &_right, &reverse](Ptr<Charx1> left, Ptr<Charx1> right, U32x1 len) -> I32x1 {
3117 Wasm_insist(left.clone().not_null(), "left operand must not be NULL");
3118 Wasm_insist(right.clone().not_null(), "right operand must not be NULL");
3119 Wasm_insist(len.clone() != 0U, "length to compare must not be 0");
3120
3121 if (_left.length() == 1 and _right.length() == 1) {
3122 /*----- Special handling of single char strings. -----*/
3123 len.discard();
3124 auto left_gt_right = *left.clone() > *right.clone();
3125 return left_gt_right.to<int32_t>() - (*left < *right).to<int32_t>();
3126 } else {
3127 if (_left.guarantees_terminating_nul() and _right.guarantees_terminating_nul() and not reverse) { // reverse needs in-bounds checks
3128 if (not d.strncmp_terminating_nul) {
3129 /*----- Create function to compute the result for non-nullptr arguments character-wise. -----*/
3130 FUNCTION(strncmp_terminating_nul, data_t::fn_t)
3131 {
3132 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
3133
3134 const auto len_ty_left = PARAMETER(0);
3135 const auto len_ty_right = PARAMETER(1);
3136 auto left = PARAMETER(2);
3137 auto right = PARAMETER(3);
3138 const auto len = PARAMETER(4);
3139
3140 Var<I32x1> result; // always set here
3141
3142 I32x1 len_left = Select(len < len_ty_left, len, len_ty_left) .make_signed();
3143 I32x1 len_right = Select(len < len_ty_right, len, len_ty_right).make_signed();
3144 Var<Ptr<Charx1>> end_left (left + len_left);
3145 Var<Ptr<Charx1>> end_right(right + len_right);
3146
3147 LOOP() {
3148 /* Check whether one side is shorter than the other. */
3149 result = (left != end_left).to<int32_t>() - (right != end_right).to<int32_t>();
3150 BREAK(result != 0 or left == end_left); // at the end of either or both strings
3151
3152 /* Compare by current character. Loading is valid since we have not seen the terminating
3153 * NUL byte yet. */
3154 result = (*left > *right).to<int32_t>() - (*left < *right).to<int32_t>();
3155 BREAK(result != 0); // found first position where strings differ
3156 BREAK(*left == 0); // reached end of identical strings
3157
3158 /* Advance to next character. */
3159 left += 1;
3160 right += 1;
3161 CONTINUE();
3162 }
3163
3164 RETURN(result);
3165 }
3166 d.strncmp_terminating_nul = std::move(strncmp_terminating_nul);
3167 }
3168
3169 /*----- Call strncmp_terminating_nul function. ------*/
3170 M_insist(bool(d.strncmp_terminating_nul));
3171 return (*d.strncmp_terminating_nul)(_left.length(), _right.length(), left, right, len);
3172 } else {
3173 if (not d.strncmp_no_terminating_nul) {
3174 /*----- Create function to compute the result for non-nullptr arguments character-wise. -----*/
3175 FUNCTION(strncmp_no_terminating_nul, data_t::fn_t)
3176 {
3177 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
3178
3179 const auto len_ty_left = PARAMETER(0);
3180 const auto len_ty_right = PARAMETER(1);
3181 Var<Ptr<Charx1>> left(PARAMETER(2));
3182 Var<Ptr<Charx1>> right(PARAMETER(3));
3183 const auto len = PARAMETER(4);
3184
3185 Var<I32x1> result; // always set here
3186
3187 I32x1 len_left = Select(len < len_ty_left, len, len_ty_left) .make_signed();
3188 I32x1 len_right = Select(len < len_ty_right, len, len_ty_right).make_signed();
3189 Var<Ptr<Charx1>> end_left, end_right;
3190
3191 if (not reverse) {
3192 /* Set end variables according to theoretical length. */
3193 end_left = left + len_left;
3194 end_right = right + len_right;
3195 } else {
3196 /* Set end variables to first found NUL byte without exceeding the theoretical length. */
3197 end_left = left;
3198 WHILE(*end_left != 0 and end_left != left + len_left) {
3199 end_left += 1;
3200 }
3201 end_right = right;
3202 WHILE(*end_right != 0 and end_right != right + len_right) {
3203 end_right += 1;
3204 }
3205
3206 /* Swap variable for current position with the one for end position to iterate reversed. */
3207 swap(left, end_left);
3208 swap(right, end_right);
3209
3210 /* Resolve off-by-one errors created by swapping variables. */
3211 left -= 1;
3212 right -= 1;
3213 end_left -= 1;
3214 end_right -= 1;
3215 }
3216
3217 LOOP() {
3218 /* Check whether one side is shorter than the other. Load next character with in-bounds
3219 * checks since the strings may not be NUL byte terminated. */
3220 Var<Charx1> val_left, val_right;
3221 IF (left != end_left) {
3222 val_left = *left;
3223 } ELSE {
3224 val_left = '\0';
3225 };
3226 IF (right != end_right) {
3227 val_right = *right;
3228 } ELSE {
3229 val_right = '\0';
3230 };
3231
3232 /* Compare by current character. */
3233 result = (val_left > val_right).to<int32_t>() - (val_left < val_right).to<int32_t>();
3234 BREAK(result != 0); // found first position where strings differ
3235 BREAK(val_left == 0); // reached end of identical strings
3236
3237 /* Advance to next character. */
3238 left += reverse ? -1 : 1;
3239 right += reverse ? -1 : 1;
3240 CONTINUE();
3241 }
3242
3243 RETURN(result);
3244 }
3245 d.strncmp_no_terminating_nul = std::move(strncmp_no_terminating_nul);
3246 }
3247
3248 /*----- Call strncmp_no_terminating_nul function. ------*/
3249 M_insist(bool(d.strncmp_no_terminating_nul));
3250 return (*d.strncmp_no_terminating_nul)(_left.length(), _right.length(), left, right, len);
3251 }
3252 }
3253 };
3254
3255 const Var<Ptr<Charx1>> left(_left.val()), right(_right.val());
3256 if (_left.can_be_null() or _right.can_be_null()) {
3257 _Var<I32x1> result; // always set here
3258 IF (left.is_null() or right.is_null()) {
3259 result = _I32x1::Null();
3260 } ELSE {
3261 result = strncmp_non_null(left, right, len);
3262 };
3263 return result;
3264 } else {
3265 const Var<I32x1> result(strncmp_non_null(left, right, len)); // to prevent duplicated computation due to `clone()`
3266 return _I32x1(result);
3267 }
3268}
3269
3270_I32x1 m::wasm::strcmp(NChar left, NChar right, bool reverse)
3271{
3272 /* Delegate to `strncmp` with length set to minimum of both string lengths **plus** 1 since we need to check if
3273 * one string is a prefix of the other, i.e. all of its characters are equal but it is shorter than the other. */
3274 U32x1 len(std::min<uint32_t>(left.length(), right.length()) + 1U);
3275 return strncmp(left, right, len, reverse);
3276}
3277
3278_Boolx1 m::wasm::strncmp(NChar left, NChar right, U32x1 len, cmp_op op, bool reverse)
3279{
3280 _I32x1 res = strncmp(left, right, len, reverse);
3281
3282 switch (op) {
3283 case EQ: return res == 0;
3284 case NE: return res != 0;
3285 case LT: return res < 0;
3286 case LE: return res <= 0;
3287 case GT: return res > 0;
3288 case GE: return res >= 0;
3289 }
3290}
3291
3292_Boolx1 m::wasm::strcmp(NChar left, NChar right, cmp_op op, bool reverse)
3293{
3294 _I32x1 res = strcmp(left, right, reverse);
3295
3296 switch (op) {
3297 case EQ: return res == 0;
3298 case NE: return res != 0;
3299 case LT: return res < 0;
3300 case LE: return res <= 0;
3301 case GT: return res > 0;
3302 case GE: return res >= 0;
3303 }
3304}
3305
3306
3307/*======================================================================================================================
3308 * string copy
3309 *====================================================================================================================*/
3310
3312{
3313 static thread_local struct {} _; // unique caller handle
3314 struct data_t : GarbageCollectedData
3315 {
3316 public:
3317 std::optional<FunctionProxy<char*(char*, char*, uint32_t)>> strncpy;
3318
3319 data_t(GarbageCollectedData &&d) : GarbageCollectedData(std::move(d)) { }
3320 };
3321 auto &d = Module::Get().add_garbage_collected_data<data_t>(&_); // garbage collect the `data_t` instance
3322
3323 if (not d.strncpy) {
3324 /*----- Create function to compute the result. -----*/
3325 FUNCTION(strncpy, char*(char*, char*, uint32_t))
3326 {
3327 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
3328
3329 auto dst = PARAMETER(0);
3330 auto src = PARAMETER(1);
3331 const auto count = PARAMETER(2);
3332
3333 Wasm_insist(not src.is_nullptr(), "source must not be nullptr");
3334 Wasm_insist(not dst.is_nullptr(), "destination must not be nullptr");
3335
3336 Var<Ptr<Charx1>> src_end(src + count.make_signed());
3337 WHILE (src != src_end) {
3338 *dst = *src;
3339 BREAK(*src == '\0'); // break on terminating NUL byte
3340 src += 1;
3341 dst += 1;
3342 }
3343
3344 RETURN(dst);
3345 }
3346 d.strncpy = std::move(strncpy);
3347 }
3348
3349 /*----- Call strncpy function. ------*/
3350 M_insist(bool(d.strncpy));
3351 const Var<Ptr<Charx1>> result((*d.strncpy)(dst, src, count)); // to prevent duplicated computation due to `clone()`
3352 return result;
3353}
3354
3355
3356/*======================================================================================================================
3357 * WasmLike
3358 *====================================================================================================================*/
3359
3360_Boolx1 m::wasm::like(NChar _str, NChar _pattern, const char escape_char)
3361{
3362 static thread_local struct {} _; // unique caller handle
3363 struct data_t : GarbageCollectedData
3364 {
3365 public:
3366 std::optional<FunctionProxy<bool(int32_t, int32_t, char*, char*, char)>> like;
3367
3368 data_t(GarbageCollectedData &&d) : GarbageCollectedData(std::move(d)) { }
3369 };
3370 auto &d = Module::Get().add_garbage_collected_data<data_t>(&_); // garbage collect the `data_t` instance
3371
3372 M_insist('_' != escape_char and '%' != escape_char, "illegal escape character");
3373
3374 if (_str.length() == 0 and _pattern.length() == 0) {
3375 _str.discard();
3376 _pattern.discard();
3377 return _Boolx1(true);
3378 }
3379
3380 auto like_non_null = [&d, &_str, &_pattern, &escape_char](Ptr<Charx1> str, Ptr<Charx1> pattern) -> Boolx1 {
3381 Wasm_insist(str.clone().not_null(), "string operand must not be NULL");
3382 Wasm_insist(pattern.clone().not_null(), "pattern operand must not be NULL");
3383
3384 if (not d.like) {
3385 /*----- Create function to compute the result. -----*/
3386 FUNCTION(like, bool(int32_t, int32_t, char*, char*, char))
3387 {
3388 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
3389
3390 const auto len_ty_str = PARAMETER(0);
3391 const auto len_ty_pattern = PARAMETER(1);
3392 const auto val_str = PARAMETER(2);
3393 const auto val_pattern = PARAMETER(3);
3394 const auto escape_char = PARAMETER(4);
3395
3396 /*----- Allocate memory for the dynamic programming table. -----*/
3397 /* Invariant: dp[i][j] == true iff val_pattern[:i] contains val_str[:j]. Row i and column j is located
3398 * at dp + (i - 1) * (`length_str` + 1) + (j - 1). */
3399 I32x1 num_entries = (len_ty_str + 1) * (len_ty_pattern + 1);
3400 const Var<Ptr<Boolx1>> dp = Module::Allocator().malloc<bool>(num_entries.clone().make_unsigned());
3401
3402 /*----- Initialize table with all entries set to false. -----*/
3403 Var<Ptr<Boolx1>> entry(dp.val());
3404 WHILE (entry < dp + num_entries.clone()) {
3405 *entry = false;
3406 entry += 1;
3407 }
3408
3409 /*----- Reset entry pointer to first entry. -----*/
3410 entry = dp.val();
3411
3412 /*----- Create pointers to track locations of current characters of `val_str` and `val_pattern`. -----*/
3413 Var<Ptr<Charx1>> str(val_str);
3414 Var<Ptr<Charx1>> pattern(val_pattern);
3415
3416 /*----- Compute ends of str and pattern. -----*/
3417 /* Create constant local variables to ensure correct pointers since `src` and `pattern` will change. */
3418 const Var<Ptr<Charx1>> end_str(str + len_ty_str);
3419 const Var<Ptr<Charx1>> end_pattern(pattern + len_ty_pattern);
3420
3421 /*----- Create variables for the current byte of str and pattern. -----*/
3422 Var<Charx1> byte_str, byte_pattern; // always loaded before first access
3423
3424 /*----- Initialize first column. -----*/
3425 /* Iterate until current byte of pattern is not a `%`-wildcard and set the respective entries to true. */
3426 DO_WHILE (byte_pattern == '%') {
3427 byte_pattern = Select(pattern < end_pattern, *pattern, '\0');
3428 *entry = true;
3429 entry += len_ty_str + 1;
3430 pattern += 1;
3431 }
3432
3433 /*----- Compute entire table. -----*/
3434 /* Create variable for the actual length of str. */
3435 Var<I32x1> len_str(0);
3436
3437 /* Create flag whether the current byte of pattern is not escaped. */
3438 Var<Boolx1> is_not_escaped(true);
3439
3440 /* Reset entry pointer to second row and second column. */
3441 entry = dp + len_ty_str + 2;
3442
3443 /* Reset pattern to first character. */
3444 pattern = val_pattern;
3445
3446 /* Load first byte from pattern if in bounds. */
3447 byte_pattern = Select(pattern < end_pattern, *pattern, '\0');
3448
3449 /* Create loop iterating as long as the current byte of pattern is not NUL. */
3450 WHILE (byte_pattern != '\0') {
3451 /* If current byte of pattern is not escaped and equals `escape_char`, advance pattern to next
3452 * byte and load it. Additionally, mark this byte as escaped and check for invalid escape
3453 * sequences. */
3454 IF (is_not_escaped and byte_pattern == escape_char) {
3455 pattern += 1;
3456 byte_pattern = Select(pattern < end_pattern, *pattern, '\0');
3457
3458 /* Check whether current byte of pattern is a validly escaped character, i.e. `_`, `%` or
3459 * `escape_char`. If not, throw an exception. */
3460 IF (byte_pattern != '_' and byte_pattern != '%' and byte_pattern != escape_char) {
3461 Throw(exception::invalid_escape_sequence);
3462 };
3463
3464 is_not_escaped = false;
3465 };
3466
3467 /* Reset actual length of str. */
3468 len_str = 0;
3469
3470 /* Load first byte from str if in bounds. */
3471 byte_str = Select(str < end_str, *str, '\0');
3472
3473 /* Create loop iterating as long as the current byte of str is not NUL. */
3474 WHILE (byte_str != '\0') {
3475 /* Increment actual length of str. */
3476 len_str += 1;
3477
3478 IF (is_not_escaped and byte_pattern == '%') {
3479 /* Store disjunction of above and left entry. */
3480 *entry = *(entry - (len_ty_str + 1)) or *(entry - 1);
3481 } ELSE {
3482 IF ((is_not_escaped and byte_pattern == '_') or byte_pattern == byte_str) {
3483 /* Store above left entry. */
3484 *entry = *(entry - (len_ty_str + 2));
3485 };
3486 };
3487
3488 /* Advance entry pointer to next entry, advance str to next byte, and load next byte from str
3489 * if in bounds. */
3490 entry += 1;
3491 str += 1;
3492 byte_str = Select(str < end_str, *str, '\0');
3493 }
3494
3495 /* Advance entry pointer to second column in the next row, reset str to first character, advance
3496 * pattern to next byte, load next byte from pattern if in bounds, and reset is_not_escaped to
3497 * true. */
3498 entry += len_ty_str + 1 - len_str;
3499 str = val_str;
3500 pattern += 1;
3501 byte_pattern = Select(pattern < end_pattern, *pattern, '\0');
3502 is_not_escaped = true;
3503 }
3504
3505 /*----- Compute result. -----*/
3506 /* Entry pointer points currently to the second column in the first row after the pattern has ended.
3507 * Therefore, we have to go one row up and len_str - 1 columns to the right, i.e. the result is
3508 * located at entry - (`length_str` + 1) + len_str - 1 = entry + len_str - (`length_str` + 2). */
3509 const Var<Boolx1> result(*(entry + len_str - (len_ty_str + 2)));
3510
3511 /*----- Free allocated space. -----*/
3512 Module::Allocator().free(dp, num_entries.make_unsigned());
3513
3514 RETURN(result);
3515 }
3516
3517 d.like = std::move(like);
3518 }
3519
3520 /*----- Call like function. ------*/
3521 M_insist(bool(d.like));
3522 return (*d.like)(_str.length(), _pattern.length(), str, pattern, escape_char);
3523 };
3524
3525 if (_str.can_be_null() or _pattern.can_be_null()) {
3526 auto [_val_str, is_null_str] = _str.split();
3527 auto [_val_pattern, is_null_pattern] = _pattern.split();
3528 Ptr<Charx1> val_str(_val_str), val_pattern(_val_pattern); // since structured bindings cannot be used in lambda capture
3529
3530 _Var<Boolx1> result; // always set here
3531 IF (is_null_str or is_null_pattern) {
3532 result = _Boolx1::Null();
3533 } ELSE {
3534 result = like_non_null(val_str, val_pattern);
3535 };
3536 return result;
3537 } else {
3538 const Var<Boolx1> result(like_non_null(_str, _pattern)); // to prevent duplicated computation due to `clone()`
3539 return _Boolx1(result);
3540 }
3541}
3542
3544{
3545 static thread_local struct {} _; // unique caller handle
3546 struct data_t : GarbageCollectedData
3547 {
3548 public:
3550 std::unordered_map<ThreadSafePooledString, FunctionProxy<bool(int32_t, char*)>> contains_map;
3551
3552 data_t(GarbageCollectedData &&d) : GarbageCollectedData(std::move(d)) { }
3553 };
3554 auto &d = Module::Get().add_garbage_collected_data<data_t>(&_); // garbage collect the `data_t` instance
3555
3556 M_insist(std::regex_match(*_pattern, std::regex("%[^_%\\\\]+%")), "invalid contains pattern");
3557
3558 if (_str.length() == 0) {
3559 _str.discard();
3560 return _Boolx1(false);
3561 }
3562
3563 auto contains_non_null = [&d, &_str, &_pattern](Ptr<Charx1> str) -> Boolx1 {
3564 Wasm_insist(str.clone().not_null(), "string operand must not be NULL");
3565
3566 auto it = d.contains_map.find(_pattern);
3567 if (it == d.contains_map.end()) {
3568 /*----- Create function to compute the result. -----*/
3569 FUNCTION(contains, bool(int32_t, char*))
3570 {
3571 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
3572
3573 const auto len_ty_str = PARAMETER(0);
3574 auto val_str = PARAMETER(1);
3575
3576 /*----- Copy pattern without enclosing `%` to make it accessible with runtime offset. -----*/
3577 const int32_t len_pattern = strlen(*_pattern) - 2; // minus 2 due to enclosing `%`
3578 auto pattern = Module::Allocator().raw_malloc<char>(len_pattern);
3579 for (std::size_t i = 0; i < len_pattern; ++i)
3580 pattern[i] = (*_pattern)[i + 1]; // access _pattern with offset +1 due to starting `%`
3581
3582 /*----- Precompute prefix table. -----*/
3583 auto tbl = Module::Allocator().raw_malloc<int32_t>(len_pattern + 1);
3584 int32_t len_prefix = -1;
3585
3586 tbl[0] = len_prefix;
3587 for (std::size_t i = 1; i < len_pattern + 1; ++i) {
3588 while (len_prefix >= 0 and pattern[len_prefix] != pattern[i - 1])
3589 len_prefix = tbl[len_prefix];
3590 ++len_prefix;
3591 tbl[i] = len_prefix;
3592 }
3593
3594 /*----- Search pattern in string. -----*/
3595 const Var<Ptr<Charx1>> end_str(val_str + len_ty_str);
3596 Var<I32x1> pos_pattern(0);
3597 WHILE (val_str < end_str and *val_str != '\0') {
3598 WHILE(pos_pattern >= 0 and *val_str != *(Ptr<Charx1>(pattern) + pos_pattern)) {
3599 Wasm_insist(pos_pattern < len_pattern + 1);
3600 pos_pattern = *(Ptr<I32x1>(tbl) + pos_pattern);
3601 }
3602 val_str += 1;
3603 pos_pattern += 1;
3604 IF (pos_pattern == len_pattern) {
3605 RETURN(true);
3606 };
3607 }
3608 RETURN(false);
3609 }
3610 it = d.contains_map.emplace_hint(it, _pattern, std::move(contains));
3611 }
3612
3613 /*----- Call contains function. ------*/
3614 M_insist(it != d.contains_map.end());
3615 return (it->second)(_str.length(), str);
3616 };
3617
3618 if (_str.can_be_null()) {
3619 auto [_val_str, is_null_str] = _str.split();
3620 Ptr<Charx1> val_str(_val_str); // since structured bindings cannot be used in lambda capture
3621
3622 _Var<Boolx1> result; // always set here
3623 IF (is_null_str) {
3624 result = _Boolx1::Null();
3625 } ELSE {
3626 result = contains_non_null(val_str);
3627 };
3628 return result;
3629 } else {
3630 const Var<Boolx1> result(contains_non_null(_str)); // to prevent duplicated computation due to `clone()`
3631 return _Boolx1(result);
3632 }
3633}
3634
3636{
3637 M_insist(std::regex_match(*pattern, std::regex("[^_%\\\\]+%")), "invalid prefix pattern");
3638
3639 /*----- Create lower bound. -----*/
3640 const int32_t len_pattern = strlen(*pattern) - 1; // minus 1 due to ending `%`
3641 auto _lower_bound = Module::Allocator().raw_malloc<char>(len_pattern + 1);
3642 for (std::size_t i = 0; i < len_pattern; ++i)
3643 _lower_bound[i] = (*pattern)[i];
3644 _lower_bound[len_pattern] = '\0';
3645 NChar lower_bound(Ptr<Charx1>(_lower_bound), false, len_pattern, true);
3646
3647 /*----- Create upper bound. -----*/
3648 auto _upper_bound = Module::Allocator().raw_malloc<char>(len_pattern + 1);
3649 for (std::size_t i = 0; i < len_pattern - 1; ++i)
3650 _upper_bound[i] = (*pattern)[i];
3651 const char last_char = (*pattern)[len_pattern - 1];
3652 _upper_bound[len_pattern - 1] = last_char + 1; // increment last character for upper bound
3653 _upper_bound[len_pattern] = '\0';
3654 NChar upper_bound(Ptr<Charx1>(_upper_bound), false, len_pattern, true);
3655
3656 /*----- Compute result by checking whether given string is in created interval. -----*/
3657 auto str_cpy = str.clone();
3658 return strcmp(str_cpy, lower_bound, GE) and strcmp(str, upper_bound, LT);
3659}
3660
3662{
3663 M_insist(std::regex_match(*pattern, std::regex("%[^_%\\\\]+")), "invalid suffix pattern");
3664
3665 /*----- Create lower bound. -----*/
3666 const int32_t len_pattern = strlen(*pattern) - 1; // minus 1 due to starting `%`
3667 auto _lower_bound = Module::Allocator().raw_malloc<char>(len_pattern + 1);
3668 for (std::size_t i = 0; i < len_pattern; ++i)
3669 _lower_bound[i] = (*pattern)[i + 1]; // access pattern with offset +1 due to starting `%`
3670 _lower_bound[len_pattern] = '\0';
3671 NChar lower_bound(Ptr<Charx1>(_lower_bound), false, len_pattern, true);
3672
3673 /*----- Create upper bound. -----*/
3674 auto _upper_bound = Module::Allocator().raw_malloc<char>(len_pattern + 1);
3675 const char first_char = (*pattern)[1]; // access first character at offset 1 due to starting `%`
3676 _upper_bound[0] = first_char + 1; // increment first character for upper bound
3677 for (std::size_t i = 1; i < len_pattern; ++i)
3678 _upper_bound[i] = (*pattern)[i + 1]; // access pattern with offset +1 due to starting `%`
3679 _upper_bound[len_pattern] = '\0';
3680 NChar upper_bound(Ptr<Charx1>(_upper_bound), false, len_pattern, true);
3681
3682 /*----- Compute result by checking whether given string is in created interval when reversed. -----*/
3683 const auto max_length = std::max<uint32_t>(str.length(), len_pattern); // use maximal length due to reversed strncmp
3684 auto str_cpy = str.clone();
3685 return strncmp(str_cpy, lower_bound, U32x1(max_length), GE, true) and
3686 strncmp(str, upper_bound, U32x1(max_length), LT, true);
3687}
3688
3689
3690/*======================================================================================================================
3691 * comparator
3692 *====================================================================================================================*/
3693
3694template<bool Predicated>
3695I32x1 m::wasm::compare(const Environment &env_left, const Environment &env_right,
3696 const std::vector<SortingOperator::order_type> &order)
3697{
3698 if constexpr (Predicated) {
3699 Var<I32x1> result(0); // explicitly (re-)set result to 0
3700
3701 /*----- Compile ordering. -----*/
3702 for (auto &o : order) {
3703 /*----- Compile order expression for left tuple. -----*/
3704 SQL_t _val_left = env_left.template compile(o.first);
3705
3706 std::visit(overloaded {
3707 [&]<typename T>(Expr<T> val_left) -> void {
3708 /*----- Compile order expression for right tuple. -----*/
3709 Expr<T> val_right = env_right.template compile<Expr<T>>(o.first);
3710
3711 M_insist(val_left.can_be_null() == val_right.can_be_null(),
3712 "either both or none of the value to compare must be nullable");
3713 if (val_left.can_be_null()) {
3714 using type = std::conditional_t<std::is_same_v<T, bool>, _I32x1, Expr<T>>;
3715 Var<type> left, right;
3716 if constexpr (std::is_same_v<T, bool>) {
3717 left = val_left.template to<int32_t>();
3718 right = val_right.template to<int32_t>();
3719 } else {
3720 left = val_left;
3721 right = val_right;
3722 }
3723
3724 /*----- Compare both with current order expression and update result. -----*/
3725 I32x1 cmp_null = right.is_null().template to<int32_t>() - left.is_null().template to<int32_t>();
3726 _I32x1 _val_lt = (left < right).template to<int32_t>();
3727 _I32x1 _val_gt = (left > right).template to<int32_t>();
3728 _I32x1 _cmp_val = o.second ? _val_gt - _val_lt : _val_lt - _val_gt;
3729 auto [cmp_val, cmp_is_null] = _cmp_val.split();
3730 cmp_is_null.discard();
3731 I32x1 cmp = (cmp_null << 1) + cmp_val; // potentially-null value of comparison is overruled by cmp_null
3732 result <<= 2; // shift result s.t. first difference will determine order
3733 result += cmp; // add current comparison to result
3734 } else {
3735 using type = std::conditional_t<std::is_same_v<T, bool>, I32x1, PrimitiveExpr<T>>;
3736 Var<type> left, right;
3737 if constexpr (std::is_same_v<T, bool>) {
3738 left = val_left.insist_not_null().template to<int32_t>();
3739 right = val_right.insist_not_null().template to<int32_t>();
3740 } else {
3741 left = val_left.insist_not_null();
3742 right = val_right.insist_not_null();
3743 }
3744
3745 /*----- Compare both with current order expression and update result. -----*/
3746 I32x1 val_lt = (left < right).template to<int32_t>();
3747 I32x1 val_gt = (left > right).template to<int32_t>();
3748 I32x1 cmp = o.second ? val_gt - val_lt : val_lt - val_gt;
3749 result <<= 1; // shift result s.t. first difference will determine order
3750 result += cmp; // add current comparison to result
3751 }
3752 },
3753 [&](NChar val_left) -> void {
3754 auto &cs = as<const CharacterSequence>(*o.first.get().type());
3755
3756 /*----- Compile order expression for right tuple. -----*/
3757 NChar val_right = env_right.template compile<NChar>(o.first);
3758
3759 Var<Ptr<Charx1>> _left(val_left.val()), _right(val_right.val());
3760 NChar left(_left, val_left.can_be_null(), val_left.length(), val_left.guarantees_terminating_nul()),
3761 right(_right, val_right.can_be_null(), val_right.length(), val_right.guarantees_terminating_nul());
3762
3763 M_insist(val_left.can_be_null() == val_right.can_be_null(),
3764 "either both or none of the value to compare must be nullable");
3765 if (val_left.can_be_null()) {
3766 /*----- Compare both with current order expression and update result. -----*/
3767 I32x1 cmp_null = _right.is_null().to<int32_t>() - _left.is_null().to<int32_t>();
3768 _I32x1 _delta = o.second ? strcmp(left, right) : strcmp(right, left);
3769 auto [delta_val, delta_is_null] = _delta.split();
3770 Wasm_insist(delta_val.clone() >= -1 and delta_val.clone() <= 1,
3771 "result of strcmp is assumed to be in [-1,1]");
3772 delta_is_null.discard();
3773 I32x1 cmp = (cmp_null << 1) + delta_val; // potentially-null value of comparison is overruled by cmp_null
3774 result <<= 2; // shift result s.t. first difference will determine order
3775 result += cmp; // add current comparison to result
3776 } else {
3777 /*----- Compare both with current order expression and update result. -----*/
3778 I32x1 delta = o.second ? strcmp(left, right).insist_not_null()
3779 : strcmp(right, left).insist_not_null();
3780 Wasm_insist(delta.clone() >= -1 and delta.clone() <= 1,
3781 "result of strcmp is assumed to be in [-1,1]");
3782 result <<= 1; // shift result s.t. first difference will determine order
3783 result += delta; // add current comparison to result
3784 }
3785 },
3786 [](auto&&) -> void { M_unreachable("SIMDfication currently not supported"); },
3787 [](std::monostate) -> void { M_unreachable("invalid expression"); }
3788 }, _val_left);
3789 }
3790
3791 return result;
3792 } else {
3793 Var<I32x1> result; // always set here
3794
3795 /*----- Compile ordering. -----*/
3796 BLOCK(compare) {
3797 auto emit_comparison_rec = [&](decltype(order.cbegin()) curr, const decltype(order.cend()) end,
3798 auto &rec) -> void
3799 {
3800 /*----- If end of ordering is reached, left and right tuple are equal. -----*/
3801 if (curr == end) {
3802 result = 0;
3803 return;
3804 }
3805
3806 /*----- Compile order expression for left tuple. -----*/
3807 SQL_t _val_left = env_left.template compile(curr->first);
3808
3809 std::visit(overloaded {
3810 [&]<typename T>(Expr<T> val_left) -> void {
3811 /*----- Compile order expression for right tuple. -----*/
3812 Expr<T> val_right = env_right.template compile<Expr<T>>(curr->first);
3813
3814 M_insist(val_left.can_be_null() == val_right.can_be_null(),
3815 "either both or none of the value to compare must be nullable");
3816 if (val_left.can_be_null()) {
3817 using type = std::conditional_t<std::is_same_v<T, bool>, _I32x1, Expr<T>>;
3818 Var<type> _left, _right;
3819 if constexpr (std::is_same_v<T, bool>) {
3820 _left = val_left.template to<int32_t>();
3821 _right = val_right.template to<int32_t>();
3822 } else {
3823 _left = val_left;
3824 _right = val_right;
3825 }
3826
3827 /*----- Compare both with current order expression and potentially set result. -----*/
3828 IF (_left.not_null()) {
3829 IF (_right.is_null()) {
3830 result = 1;
3831 GOTO(compare);
3832 };
3833 auto left = _left.val().insist_not_null(),
3834 right = _right.val().insist_not_null();
3835 Boolx1 left_lt_right = curr->second ? left.clone() < right.clone()
3836 : left.clone() > right.clone();
3837 IF (left_lt_right) {
3838 result = -1;
3839 GOTO(compare);
3840 };
3841 Boolx1 left_gt_right = curr->second ? left > right : left < right;
3842 IF (left_gt_right) {
3843 result = 1;
3844 GOTO(compare);
3845 };
3846 } ELSE {
3847 IF (_right.not_null()) {
3848 result = -1;
3849 GOTO(compare);
3850 };
3851 };
3852 } else {
3853 using type = std::conditional_t<std::is_same_v<T, bool>, I32x1, PrimitiveExpr<T>>;
3854 Var<type> left, right;
3855 if constexpr (std::is_same_v<T, bool>) {
3856 left = val_left.insist_not_null().template to<int32_t>();
3857 right = val_right.insist_not_null().template to<int32_t>();
3858 } else {
3859 left = val_left.insist_not_null();
3860 right = val_right.insist_not_null();
3861 }
3862
3863 /*----- Compare both with current order expression and potentially set result. -----*/
3864 Boolx1 left_lt_right = curr->second ? left < right : left > right;
3865 IF (left_lt_right) {
3866 result = -1;
3867 GOTO(compare);
3868 };
3869 Boolx1 left_gt_right = curr->second ? left > right : left < right;
3870 IF (left_gt_right) {
3871 result = 1;
3872 GOTO(compare);
3873 };
3874 }
3875 },
3876 [&](NChar val_left) -> void {
3877 auto &cs = as<const CharacterSequence>(*curr->first.get().type());
3878
3879 /*----- Compile order expression for right tuple. -----*/
3880 NChar val_right = env_right.template compile<NChar>(curr->first);
3881
3882 Var<Ptr<Charx1>> _left(val_left.val()), _right(val_right.val());
3884 NChar left(_left, false, val_left.length(), val_left.guarantees_terminating_nul()),
3885 right(_right, false, val_right.length(), val_right.guarantees_terminating_nul());
3886
3887 M_insist(val_left.can_be_null() == val_right.can_be_null(),
3888 "either both or none of the value to compare must be nullable");
3889 if (val_left.can_be_null()) {
3890 /*----- Compare both with current order expression and potentially set result. -----*/
3891 IF (_left.not_null()) {
3892 IF (_right.is_null()) {
3893 result = 1;
3894 GOTO(compare);
3895 };
3896 I32x1 cmp = curr->second ? strcmp(left, right).insist_not_null()
3897 : strcmp(right, left).insist_not_null();
3898 IF (cmp.clone() != 0) {
3899 result = cmp;
3900 GOTO(compare);
3901 };
3902 } ELSE {
3903 IF (_right.not_null()) {
3904 result = -1;
3905 GOTO(compare);
3906 };
3907 };
3908 } else {
3909 /*----- Compare both with current order expression and potentially set result. -----*/
3910 I32x1 cmp = curr->second ? strcmp(left, right).insist_not_null()
3911 : strcmp(right, left).insist_not_null();
3912 IF (cmp.clone() != 0) {
3913 result = cmp;
3914 GOTO(compare);
3915 };
3916 }
3917 },
3918 [](auto&&) -> void { M_unreachable("SIMDfication currently not supported"); },
3919 [](std::monostate) -> void { M_unreachable("invalid expression"); }
3920 }, _val_left);
3921
3922 /*----- Recurse to next comparison. -----*/
3923 rec(std::next(curr), end, rec);
3924 };
3925 emit_comparison_rec(order.cbegin(), order.cend(), emit_comparison_rec);
3926 }
3927
3928 /* GOTOs from above jump here */
3929
3930 return result;
3931 }
3932}
3933
3934// explicit instantiations to prevent linker errors
3935template I32x1 m::wasm::compare<false>(
3936 const Environment&, const Environment&, const std::vector<SortingOperator::order_type>&
3937);
3938template I32x1 m::wasm::compare<true>(
3939 const Environment&, const Environment&, const std::vector<SortingOperator::order_type>&
3940);
__attribute__((constructor(202))) static void register_interpreter()
#define id(X)
#define M_insist_no_ternary_logic()
Definition: WasmDSL.hpp:45
#define Wasm_insist(...)
Definition: WasmDSL.hpp:373
#define Throw(...)
Definition: WasmMacro.hpp:48
#define ELSE
Definition: WasmMacro.hpp:24
#define LOOP(...)
Definition: WasmMacro.hpp:30
#define WHILE(...)
Definition: WasmMacro.hpp:43
#define BLOCK(...)
Definition: WasmMacro.hpp:15
#define PARAMETER(IDX)
Definition: WasmMacro.hpp:20
#define IF(COND)
Definition: WasmMacro.hpp:23
#define FUNCTION(NAME, TYPE)
Definition: WasmMacro.hpp:17
#define DO_WHILE(...)
Definition: WasmMacro.hpp:37
#define BLOCK_OPEN(BLK)
Definition: WasmMacro.hpp:8
#define RETURN(RES)
Definition: WasmMacro.hpp:21
void convert_in_place(SQL_t &operand)
Convert operand of some SQL_t type to the target type.
Definition: WasmUtil.cpp:61
#define BINOP(OP)
#define CMPOP(OP, STRCMP_OP)
std::conditional_t< CanBeNull, _Bool< L >, Bool< L > > compile_cnf(ExprCompiler &C, const cnf::CNF &cnf)
Definition: WasmUtil.cpp:112
#define UNOP(OP)
#define CALL(TYPE)
void add(const char *group_name, const char *short_name, const char *long_name, const char *description, Callback &&callback)
Adds a new group option to the ArgParser.
Definition: ArgParser.hpp:84
#define M_unreachable(MSG)
Definition: macro.hpp:146
#define M_CONSTEXPR_COND(COND, IF_TRUE, IF_FALSE)
Definition: macro.hpp:54
#define M_insist(...)
Definition: macro.hpp:129
const Schema const Schema & tuple_schema
Definition: DataLayout.hpp:255
std::size_t get_num_simd_lanes(const DataLayout &layout, const Schema &layout_schema, const Schema &tuple_schema)
Returns the number of SIMD lanes used for accessing tuples of schema tuple_schema in SIMDfied manner ...
Definition: DataLayout.cpp:244
const Schema & layout_schema
Definition: DataLayout.hpp:255
_I32x1 strcmp(NChar left, NChar right, bool reverse=false)
Compares two strings left and right.
Definition: WasmUtil.cpp:3270
_Boolx1 like_prefix(NChar str, const ThreadSafePooledString &pattern)
Checks whether the string str has the prefix pattern.
Definition: WasmUtil.cpp:3635
_Boolx1 like_contains(NChar str, const ThreadSafePooledString &pattern)
Checks whether the string str contains the pattern pattern.
Definition: WasmUtil.cpp:3543
_I32x1 strncmp(NChar left, NChar right, U32x1 len, bool reverse=false)
Compares two strings left and right.
Definition: WasmUtil.cpp:3102
std::variant< std::monostate #define ADD_TYPE(TYPE) SQL_TYPES(ADD_TYPE) > SQL_t
Definition: WasmUtil.hpp:397
void compile_data_layout_point_access(const Schema &_tuple_value_schema, const Schema &_tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, const Schema &layout_schema, U32x1 tuple_id)
Compiles the data layout layout starting at memory address base_address and containing tuples of sche...
Definition: WasmUtil.cpp:1946
Ptr< Charx1 > strncpy(Ptr< Charx1 > dst, Ptr< Charx1 > src, U32x1 count)
Copies the contents of src to dst, but no more than count characters.
Definition: WasmUtil.cpp:3311
template I32x1 compare< true >(const Environment &, const Environment &, const std::vector< SortingOperator::order_type > &)
typename detail::_var_helper< T >::type _Var
Local variable that can always be NULL.
Definition: WasmDSL.hpp:5784
Bool< L > T mask
Definition: WasmUtil.hpp:1325
typename detail::var_helper< T >::type Var
Local variable.
Definition: WasmDSL.hpp:5779
std::tuple< Block, Block, Block > compile_store_sequential(const Schema &tuple_value_schema, const Schema &tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, std::size_t num_simd_lanes, const Schema &layout_schema, Variable< uint32_t, Kind, false > &tuple_id)
Compiles the data layout layout containing tuples of schema layout_schema such that it sequentially s...
std::variant< std::monostate #define ADD_TYPE(TYPE) SQL_ADDR_TYPES(ADD_TYPE) > SQL_addr_t
Definition: WasmUtil.hpp:404
std::tuple< Block, Block, Block > compile_store_sequential_single_pass(const Schema &tuple_value_schema, const Schema &tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, std::size_t num_simd_lanes, const Schema &layout_schema, Variable< uint32_t, Kind, false > &tuple_id)
Compiles the data layout layout containing tuples of schema layout_schema such that it sequentially s...
PrimitiveExpr< bool, L > eqz() and(L
auto make_signed()
Conversion of a PrimitiveExpr<T, L> to a PrimitiveExpr<std::make_signed_t<T>, L>.
Definition: WasmDSL.hpp:3650
void GOTO(const Block &block)
Jumps to the end of block.
Definition: WasmDSL.hpp:6199
void compile_load_point_access(const Schema &tuple_value_schema, const Schema &tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, const Schema &layout_schema, U32x1 tuple_id)
Compiles the data layout layout starting at memory address base_address and containing tuples of sche...
Definition: WasmUtil.cpp:2480
template I32x1 compare< false >(const Environment &, const Environment &, const std::vector< SortingOperator::order_type > &)
and
Constructs a new PrimitiveExpr from a constant value.
Definition: WasmDSL.hpp:1519
std::size_t L
Definition: WasmDSL.hpp:528
void compile_store_point_access(const Schema &tuple_value_schema, const Schema &tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, const Schema &layout_schema, U32x1 tuple_id)
Compiles the data layout layout starting at memory address base_address and containing tuples of sche...
Definition: WasmUtil.cpp:2472
typename detail::global_helper< T >::type Global
Global variable.
Definition: WasmDSL.hpp:5789
Bool< L > value
Definition: WasmUtil.hpp:1317
Bool< L > is_null(SQL_t &variant)
Definition: WasmUtil.hpp:461
auto Select(C &&_cond, T &&_tru, U &&_fals)
Definition: WasmDSL.hpp:6215
_Boolx1 like(NChar str, NChar pattern, const char escape_char='\\')
Checks whether the string str matches the pattern pattern regarding SQL LIKE semantics using escape c...
Definition: WasmUtil.cpp:3360
std::tuple< Block, Block, Block > compile_load_sequential(const Schema &tuple_value_schema, const Schema &tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, std::size_t num_simd_lanes, const Schema &layout_schema, Variable< uint32_t, Kind, false > &tuple_id)
Compiles the data layout layout containing tuples of schema layout_schema such that it sequentially l...
Bool< L > uint8_t n
Definition: WasmUtil.hpp:1318
void discard()
Discards this.
Definition: WasmDSL.hpp:1588
PrimitiveExpr< uint64_t, L > L L L L U
Definition: WasmDSL.hpp:2352
_Boolx1 like_suffix(NChar str, const ThreadSafePooledString &pattern)
Checks whether the string str has the suffix pattern.
Definition: WasmUtil.cpp:3661
for(std::size_t idx=1;idx< num_vectors;++idx) res.emplace((vectors_[idx].bitmask()<< uint32_t(idx *vector_type return * res
Definition: WasmDSL.hpp:3696
auto op
Definition: WasmDSL.hpp:2384
std::variant< std::monostate, _Boolx1, _Boolx16, _Boolx32 > SQL_boolean_t
Definition: WasmUtil.hpp:409
std::size_t bool
Definition: WasmDSL.hpp:528
I32x1 compare(const Environment &env_left, const Environment &env_right, const std::vector< SortingOperator::order_type > &order)
Compares two tuples, which must be already loaded into the environments env_left and env_right,...
Definition: WasmUtil.cpp:3695
void CONTINUE(std::size_t level=1)
Definition: WasmDSL.hpp:6187
std::pair<::wasm::Expression *, std::list< std::shared_ptr< Bit > > > move()
Moves the underlying Binaryen ::wasm::Expression and the referenced bits out of this.
Definition: WasmDSL.hpp:1567
typename uint< W >::type uint_t
Definition: WasmDSL.hpp:340
PrimitiveExpr clone() const
Creates and returns a deep copy of this.
Definition: WasmDSL.hpp:1577
cmp_op
‍comparison operations, e.g. for string comparison
Definition: WasmUtil.hpp:1336
void BREAK(std::size_t level=1)
Definition: WasmDSL.hpp:6176
static constexpr std::size_t num_simd_lanes
‍the number of SIMD lanes of the represented expression, i.e. 1 for scalar and at least 2 for vectori...
Definition: WasmDSL.hpp:1466
‍mutable namespace
Definition: Backend.hpp:10
M_EXPORT constexpr bool is_pow_2(T n)
Definition: fn.hpp:129
std::function< void(void)> pipeline_t
bool M_EXPORT contains(const H &haystack, const N &needle)
Checks whether haystack contains needle.
Definition: fn.hpp:383
void swap(PlanTableBase< Actual > &first, PlanTableBase< Actual > &second)
Definition: PlanTable.hpp:394
T(x)
ThreadSafeStringPool::proxy_type ThreadSafePooledString
Definition: Pool.hpp:464
and
Definition: enum_ops.hpp:12
bool M_EXPORT like(const std::string &str, const std::string &pattern, const char escape_char='\\')
Compares a SQL-style LIKE pattern with the given std::string.
Definition: fn.cpp:68
and arithmetic< U > and same_signedness< T, U > U
Definition: concepts.hpp:90
std::string interpret(const std::string &str, char esc='\\', char quote='"')
Definition: fn.hpp:319
void M_EXPORT setbit(T *bytes, bool value, uint32_t n)
Definition: fn.hpp:442
auto visit(Callable &&callable, Base &obj, m::tag< Callable > &&=m::tag< Callable >())
Generic implementation to visit a class hierarchy, with similar syntax as std::visit.
Definition: Visitor.hpp:138
‍command-line options for the HeuristicSearchPlanEnumerator
Definition: V8Engine.cpp:44
STL namespace.
The boolean type.
Definition: Type.hpp:230
The catalog contains all Databases and keeps track of all meta information of the database system.
Definition: Catalog.hpp:215
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
Definition: Catalog.hpp:274
static Catalog & Get()
Return a reference to the single Catalog instance.
m::ArgParser & arg_parser()
Definition: Catalog.hpp:253
The type of character strings, both fixed length and varying length.
Definition: Type.hpp:290
The date type.
Definition: Type.hpp:364
The date type.
Definition: Type.hpp:335
fnid_t fnid
the function id
Definition: Schema.hpp:841
A Type that represents the absence of any other type.
Definition: Type.hpp:204
The numeric type represents integer and floating-point types of different precision and scale.
Definition: Type.hpp:393
uint64_t size() const override
Compute the size in bits of an instance of this type.
Definition: Type.hpp:433
Pooled< T, Pool, false > assert_not_none() const
Definition: Pool.hpp:239
An Identifier is composed of a name and an optional prefix.
Definition: Schema.hpp:42
A Schema represents a sequence of identifiers, optionally with a prefix, and their associated types.
Definition: Schema.hpp:39
std::size_t num_entries() const
Returns the number of entries in this Schema.
Definition: Schema.hpp:124
const_iterator cend() const
Definition: Schema.hpp:121
Schema deduplicate() const
Returns a deduplicated version of this Schema, i.e.
Definition: Schema.hpp:190
iterator find(const Identifier &id)
Returns an iterator to the entry with the given Identifier id, or end() if no such entry exists.
Definition: Schema.hpp:129
Schema drop_constants() const
Returns a copy of this Schema where all constant entries are removed.
Definition: Schema.hpp:200
bool is_none() const
Definition: Type.hpp:72
A binary expression.
Definition: AST.hpp:348
std::unique_ptr< Expr > lhs
Definition: AST.hpp:349
const Numeric * common_operand_type
Definition: AST.hpp:351
std::unique_ptr< Expr > rhs
Definition: AST.hpp:350
Token op() const
Definition: AST.hpp:377
A constant: a string literal or a numeric constant.
Definition: AST.hpp:213
A designator.
Definition: AST.hpp:134
Token table_name
Definition: AST.hpp:138
Token attr_name
Definition: AST.hpp:139
The error expression.
Definition: AST.hpp:116
const Type * type() const
Returns the Type of this Expr.
Definition: AST.hpp:58
A function application.
Definition: AST.hpp:246
std::vector< std::unique_ptr< Expr > > args
Definition: AST.hpp:250
const Function & get_function() const
Definition: AST.hpp:312
A query expression for nested queries.
Definition: AST.hpp:389
const ThreadSafePooledString & alias() const
Definition: AST.hpp:421
TokenType type
Definition: Token.hpp:17
ThreadSafePooledOptionalString text
declared as optional for dummy tokens
Definition: Token.hpp:16
A unary expression: "+e", "-e", "~e", "NOT e".
Definition: AST.hpp:324
Token op() const
Definition: AST.hpp:336
std::unique_ptr< Expr > expr
Definition: AST.hpp:325
A CNF represents a conjunction of cnf::Clauses.
Definition: CNF.hpp:134
bool can_be_null() const
Returns true iff this CNF formula is nullable, i.e.
Definition: CNF.hpp:155
This is an interface for factories that compute particular DataLayouts for a given sequence of Types,...
virtual size_type num_tuples() const =0
‍returns the number of tuples represented by an instance of this node
Models how data is laid out in a linear address space.
Definition: DataLayout.hpp:29
bool is_finite() const
‍returns true iff this DataLayout lays out a finite sequence of tuples
Definition: DataLayout.hpp:200
uint64_t stride_in_bits() const
‍return the stride (in bits) of the single child of the DataLayout
Definition: DataLayout.hpp:207
const Node & child() const
‍returns a reference to the single child of this DataLayout
Definition: DataLayout.hpp:209
size_type num_tuples() const
‍returns the number of tuples laid out by this DataLayout; must not be called when not is_finite()
Definition: DataLayout.hpp:202
std::vector< level_info_t > level_info_stack_t
Definition: DataLayout.hpp:53
void for_sibling_leaves(callback_leaves_t callback) const
Definition: DataLayout.cpp:161
Represents a code block, i.e.
Definition: WasmDSL.hpp:1005
void attach_to_current()
Attaches this Block to the wasm::Block currently active in the Module.
Definition: WasmDSL.hpp:1084
bool empty() const
Returns whether this Block is empty, i.e.
Definition: WasmDSL.hpp:1075
Buffers tuples by materializing them into memory.
Definition: WasmUtil.hpp:1070
buffer_load_proxy_t< IsGlobal > create_load_proxy(param_t tuple_value_schema=param_t(), param_t tuple_addr_schema=param_t()) const
Creates and returns a proxy object to load value tuples of schema tuple_value_schema (default: entire...
Definition: WasmUtil.cpp:2535
void execute_pipeline_inline(setup_t setup, pipeline_t pipeline, teardown_t teardown, param_t tuple_value_schema=param_t(), param_t tuple_addr_schema=param_t()) const
Emits code inline to execute the given pipeline pipeline for each value tuple of schema tuple_value_s...
Definition: WasmUtil.cpp:2833
void resume_pipeline(param_t tuple_value_schema=param_t(), param_t tuple_addr_schema=param_t()) const
Emits code into a separate function to resume the pipeline for each value tuple of schema tuple_value...
Definition: WasmUtil.cpp:2679
Buffer(const Schema &schema, const storage::DataLayoutFactory &factory, bool load_simdfied=false, std::size_t num_tuples=0, setup_t setup=setup_t::Make_Without_Parent(), pipeline_t pipeline=pipeline_t(), teardown_t teardown=teardown_t::Make_Without_Parent())
Creates a buffer for num_tuples tuples (0 means infinite) of schema schema using the data layout crea...
Definition: WasmUtil.cpp:2494
void execute_pipeline(setup_t setup, pipeline_t pipeline, teardown_t teardown, param_t tuple_value_schema=param_t(), param_t tuple_addr_schema=param_t()) const
Emits code into a separate function to execute the give pipeline pipeline for each value tuple of sch...
Definition: WasmUtil.cpp:2761
const Schema & schema() const
Returns the schema of the buffer.
Definition: WasmUtil.hpp:1106
void consume()
Emits code to store the current tuple into the buffer.
Definition: WasmUtil.cpp:2910
buffer_swap_proxy_t< IsGlobal > create_swap_proxy(param_t tuple_schema=param_t()) const
Creates and returns a proxy object to swap tuples of schema tuple_schema (default: entire tuples) in ...
Definition: WasmUtil.cpp:2570
std::optional< std::reference_wrapper< const Schema > > param_t
‍parameter type for proxy creation and pipeline resuming methods
Definition: WasmUtil.hpp:1073
void setup()
Performs the setup of all local variables of this buffer (by reading them from the global backups iff...
Definition: WasmUtil.cpp:2583
storage::DataLayout layout_
data layout of buffer
Definition: WasmUtil.hpp:1076
buffer_storage< IsGlobal > storage_
if IsGlobal, contains backups for base address, capacity, and size
Definition: WasmUtil.hpp:1082
buffer_store_proxy_t< IsGlobal > create_store_proxy(param_t tuple_schema=param_t()) const
Creates and returns a proxy object to store tuples of schema tuple_schema (default: entire tuples) to...
Definition: WasmUtil.cpp:2557
void resume_pipeline_inline(param_t tuple_value_schema=param_t(), param_t tuple_addr_schema=param_t()) const
Emits code inline to resume the pipeline for each value tuple of schema tuple_value_schema (default: ...
Definition: WasmUtil.cpp:2755
void teardown()
Performs the teardown of all local variables of this buffer (by storing them into the global backups ...
Definition: WasmUtil.cpp:2642
std::size_t num_simd_lanes() const
Returns the number of SIMD lanes used.
Definition: WasmUtil.hpp:939
std::size_t num_simd_lanes_preferred() const
Returns the number of SIMD lanes preferred by other operators.
Definition: WasmUtil.hpp:944
static thread_local std::unique_ptr< CodeGenContext > the_context_
Definition: WasmUtil.hpp:878
Environment & env()
Returns the current Environment.
Definition: WasmUtil.hpp:905
void set_num_simd_lanes(std::size_t n)
Sets the number of SIMD lanes used to n.
Definition: WasmUtil.hpp:941
static CodeGenContext & Get()
Definition: WasmUtil.hpp:889
Scope scoped_environment()
Creates a new, scoped Environment.
Definition: WasmUtil.hpp:897
Binds Schema::Identifiers to Expr<T>s.
Definition: WasmUtil.hpp:563
void dump() const
Definition: WasmUtil.cpp:539
void add(Schema::Identifier id, T &&expr)
‍Adds a mapping from id to expr.
Definition: WasmUtil.hpp:619
std::unordered_map< Schema::Identifier, SQL_t > exprs_
‍maps Schema::Identifiers to Expr<T>s that evaluate to the current expression
Definition: WasmUtil.hpp:566
SQL_t get(const Schema::Identifier &id) const
‍Returns the copied entry for identifier id.
Definition: WasmUtil.hpp:699
std::unordered_map< Schema::Identifier, SQL_addr_t > expr_addrs_
‍maps Schema::Identifiers to Ptr<Expr<T>>s that evaluate to the address of the current expression
Definition: WasmUtil.hpp:568
Compiles AST expressions m::Expr to Wasm ASTs m::wasm::Expr<T>.
Definition: WasmUtil.hpp:495
void operator()(const ast::ErrorExpr &) override
Definition: WasmUtil.cpp:153
SQL_t compile(const m::ast::Expr &e)
‍Compiles a m::Expr e of statically unknown type to a SQL_t.
Definition: WasmUtil.hpp:506
void set(SQL_t &&value)
Definition: WasmUtil.hpp:547
const Environment & env_
‍the environment to use for resolving designators to Expr<T>s
Definition: WasmUtil.hpp:500
A handle to create a Function and to create invocations of that function.
Definition: WasmDSL.hpp:1367
Helper struct for garbage collection done by the Module.
Definition: WasmDSL.hpp:603
C & add_garbage_collected_data(void *handle, Args... args)
Adds and returns an instance of.
Definition: WasmDSL.hpp:916
static Module & Get()
Definition: WasmDSL.hpp:714
friend struct Allocator
Definition: WasmDSL.hpp:652
std::size_t length() const
Definition: WasmUtil.hpp:81
bool guarantees_terminating_nul() const
Definition: WasmUtil.hpp:83
NChar clone() const
Definition: WasmUtil.hpp:53
bool can_be_null() const
Definition: WasmUtil.hpp:80
Ptr< Charx1 > val()
Definition: WasmUtil.hpp:55
Proxy to implement loads from a buffer.
Definition: WasmUtil.hpp:1207
Proxy to implement stores to a buffer.
Definition: WasmUtil.hpp:1243
Proxy to implement swaps in a buffer.
Definition: WasmUtil.hpp:1276
void operator()(U32x1 first, U32x1 second)
Swaps tuples with IDs first and second.
Definition: WasmUtil.cpp:2980
Helper type to deduce the Expr<U> type given a.
Definition: WasmDSL.hpp:160