mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
WasmUtil.cpp
Go to the documentation of this file.
2
7#include <optional>
8#include <regex>
9#include <tuple>
10
11
12using namespace m;
13using namespace m::storage;
14using namespace m::wasm;
15
16
17namespace {
18
19namespace options {
20
22bool pointer_sharing = true;
23
25bool remainder_removal = true;
26
27}
28
29__attribute__((constructor(201)))
30static void add_wasm_util_args()
31{
32 Catalog &C = Catalog::Get();
33
34 /*----- Command-line arguments -----*/
35 C.arg_parser().add<bool>(
36 /* group= */ "Wasm",
37 /* short= */ nullptr,
38 /* long= */ "--no-pointer-sharing",
39 /* description= */ "do not use pointer sharing optimization for data layout compilation",
40 /* callback= */ [](bool){ options::pointer_sharing = false; }
41 );
42 C.arg_parser().add<bool>(
43 /* group= */ "Wasm",
44 /* short= */ nullptr,
45 /* long= */ "--no-remainder-removal",
46 /* description= */ "do not use remainder removal optimization for data layout compilation",
47 /* callback= */ [](bool){ options::remainder_removal = false; }
48 );
49}
50
51}
52
53
54/*======================================================================================================================
55 * Helper functions
56 *====================================================================================================================*/
57
60template<arithmetic T>
61void convert_in_place(SQL_t &operand)
62{
63 std::visit(overloaded {
64 [&operand](auto &&actual) -> void requires requires { { actual.template to<T>() } -> sql_type; } {
65 auto v = actual.template to<T>();
66 operand.~SQL_t();
67 new (&operand) SQL_t(v);
68 },
69 [](auto &actual) -> void requires (not requires { { actual.template to<T>() } -> sql_type; }) {
70 M_unreachable("illegal conversion");
71 },
72 [](std::monostate) -> void { M_unreachable("invalid variant"); },
73 }, operand);
74}
75
78void convert_in_place(SQL_t &operand, const Numeric *to_type)
79{
80 switch (to_type->kind) {
81 case Numeric::N_Decimal:
82 M_unreachable("currently not supported");
83
84 case Numeric::N_Int:
85 switch (to_type->size()) {
86 default:
87 M_unreachable("invalid integer size");
88 case 8:
89 convert_in_place<int8_t>(operand);
90 return;
91 case 16:
92 convert_in_place<int16_t>(operand);
93 return;
94 case 32:
95 convert_in_place<int32_t>(operand);
96 return;
97 case 64:
98 convert_in_place<int64_t>(operand);
99 return;
100 }
101 break;
102 case Numeric::N_Float:
103 if (to_type->size() <= 32)
104 convert_in_place<float>(operand);
105 else
106 convert_in_place<double>(operand);
107 break;
108 }
109}
110
111template<bool CanBeNull, std::size_t L>
112std::conditional_t<CanBeNull, _Bool<L>, Bool<L>> compile_cnf(ExprCompiler &C, const cnf::CNF &cnf)
113{
114 using result_t = std::conditional_t<CanBeNull, _Bool<L>, Bool<L>>;
115
116 if (cnf.empty())
117 return result_t(true);
118
119 std::optional<result_t> wasm_cnf, wasm_clause;
120 for (auto &clause : cnf) {
121 wasm_clause.reset();
122 for (auto &pred : clause) {
123 /* Generate code for the literal of the predicate. */
124 M_insist(pred.expr().type()->is_boolean());
125 auto compiled = M_CONSTEXPR_COND(CanBeNull, C.compile<_Bool<L>>(pred.expr()),
126 C.compile<_Bool<L>>(pred.expr()).insist_not_null());
127 auto wasm_pred = pred.negative() ? not compiled : compiled;
128
129 /* Add the predicate to the clause with an `or`. */
130 if (wasm_clause)
131 wasm_clause.emplace(*wasm_clause or wasm_pred);
132 else
133 wasm_clause.emplace(wasm_pred);
134 }
135 M_insist(bool(wasm_clause), "empty clause?");
136
137 /* Add the clause to the CNF with an `and`. */
138 if (wasm_cnf)
139 wasm_cnf.emplace(*wasm_cnf and *wasm_clause);
140 else
141 wasm_cnf.emplace(*wasm_clause);
142 }
143 M_insist(bool(wasm_cnf), "empty CNF?");
144
145 return *wasm_cnf;
146}
147
148
149/*======================================================================================================================
150 * ExprCompiler
151 *====================================================================================================================*/
152
153void ExprCompiler::operator()(const ast::ErrorExpr&) { M_unreachable("no errors at this stage"); }
154
156{
157 if (e.type()->is_none()) { // create NULL
159 default: M_unreachable("invalid number of SIMD lanes");
160 case 1: set(_I32x1::Null()); break;
161 case 2: set(_I32x2::Null()); break;
162 case 4: set(_I32x4::Null()); break;
163 case 8: set(_I32x8::Null()); break;
164 case 16: set(_I32x16::Null()); break;
165 case 32: set(_I32x32::Null()); break;
166 }
167 return;
168 }
169
170 /* Search with fully qualified name. */
172 set(env_.get(id));
173}
174
176{
177 if (e.type()->is_none()) { // create NULL
179 default: M_unreachable("invalid number of SIMD lanes");
180 case 1: set(_I32x1::Null()); break;
181 case 2: set(_I32x2::Null()); break;
182 case 4: set(_I32x4::Null()); break;
183 case 8: set(_I32x8::Null()); break;
184 case 16: set(_I32x16::Null()); break;
185 case 32: set(_I32x32::Null()); break;
186 }
187 return;
188 }
189
190 /* Interpret constant. */
191 auto value = Interpreter::eval(e);
192
193 auto set_constant = [this, &e, &value]<std::size_t L>(){
194 auto set_helper = overloaded {
195 [this]<sql_type T>(T &&actual) { this->set(std::forward<T>(actual)); },
196 [](auto&&) { M_unreachable("not a SQL type"); }
197 };
198
200 [&value, &set_helper](const Boolean&) { set_helper(_Bool<L>(value.as_b())); },
201 [&value, &set_helper](const Numeric &n) {
202 switch (n.kind) {
203 case Numeric::N_Int:
204 case Numeric::N_Decimal:
205 switch (n.size()) {
206 default:
207 M_unreachable("invalid integer size");
208 case 8:
209 set_helper(_I8<L>(value.as_i()));
210 break;
211 case 16:
212 set_helper(_I16<L>(value.as_i()));
213 break;
214 case 32:
215 set_helper(_I32<L>(value.as_i()));
216 break;
217 case 64:
218 set_helper(_I64<L>(value.as_i()));
219 break;
220 }
221 break;
222 case Numeric::N_Float:
223 if (n.size() <= 32)
224 set_helper(_Float<L>(value.as_f()));
225 else
226 set_helper(_Double<L>(value.as_d()));
227 }
228 },
229 [this, &value](const CharacterSequence&) {
230 M_insist(L == 1, "string SIMDfication currently not supported");
231 set(CodeGenContext::Get().get_literal_address(value.as<const char*>()));
232 },
233 [&value, &set_helper](const Date&) { set_helper(_I32<L>(value.as_i())); },
234 [&value, &set_helper](const DateTime&) { set_helper(_I64<L>(value.as_i())); },
235 [](const NoneType&) { M_unreachable("should've been handled earlier"); },
236 [](auto&&) { M_unreachable("invalid type for given number of SIMD lanes"); },
237 }, *e.type());
238 };
240 default: M_unreachable("invalid number of SIMD lanes");
241 case 1: set_constant.operator()<1>(); break;
242 case 2: set_constant.operator()<2>(); break;
243 case 4: set_constant.operator()<4>(); break;
244 case 8: set_constant.operator()<8>(); break;
245 case 16: set_constant.operator()<16>(); break;
246 case 32: set_constant.operator()<32>(); break;
247 }
248}
249
251{
252 /* This is a helper to apply unary operations to `Expr<T>`s. It uses SFINAE within `overloaded` to only apply the
253 * operation if it is well typed, e.g. `+42` is ok whereas `+true` is not. */
254 auto apply_unop = [this, &e](auto unop) {
255 (*this)(*e.expr);
256 std::visit(overloaded {
257 [](std::monostate&&) -> void { M_unreachable("illegal value"); },
258 [this, &unop](auto &&expr) -> void requires requires { { unop(expr) } -> sql_type; } {
259 this->set(unop(expr));
260 },
261 [](auto &&expr) -> void requires (not requires { { unop(expr) } -> sql_type; }) {
262 M_unreachable("illegal operation");
263 },
264 }, get());
265 };
266
267#define UNOP(OP) apply_unop(overloaded { \
268 [](auto &&expr) -> decltype(expr.operator OP()) { return expr.operator OP(); }, \
269 }); \
270 break
271
272 switch (e.op().type) {
273 default:
274 M_unreachable("invalid operator");
275
276 case TK_PLUS: UNOP(+);
277 case TK_MINUS: UNOP(-);
278 case TK_TILDE: UNOP(~);
279 case TK_Not: UNOP(not);
280 }
281#undef UNOP
282}
283
285{
286 /* This is a helper to apply binary operations to `Expr<T>`s. It uses SFINAE within `overloaded` to only apply the
287 * operation if it is well typed, e.g. `42 + 13` is ok whereas `true + 42` is not. */
288 auto apply_binop = [this, &e](auto binop) {
289 (*this)(*e.lhs);
290 SQL_t lhs = get();
291
292 (*this)(*e.rhs);
293 SQL_t rhs = get();
294
295 if (e.common_operand_type) {
296 convert_in_place(lhs, e.common_operand_type); // convert in-place
297 convert_in_place(rhs, e.common_operand_type); // convert in-place
298 }
299
300 std::visit(overloaded {
301 [](std::monostate&&) -> void { M_unreachable("illegal value"); },
302 [this, &binop, &rhs](auto &&expr_lhs) -> void {
303 std::visit(overloaded {
304 [](std::monostate&&) -> void { M_unreachable("illegal value"); },
305 [this, expr_lhs, &binop](auto &&expr_rhs) mutable -> void
306 requires requires { { binop(expr_lhs, expr_rhs) } -> sql_type; } {
307 this->set(binop(expr_lhs, expr_rhs));
308 },
309 [](auto &&expr_rhs) -> void
310 requires (not requires { { binop(expr_lhs, expr_rhs) } -> sql_type; }) {
311 M_unreachable("illegal operation");
312 },
313 }, rhs);
314 },
315 }, lhs);
316 };
317
318#define BINOP(OP) apply_binop( \
319 [](auto lhs, auto rhs) -> decltype(lhs.operator OP(rhs)) { return lhs.operator OP(rhs); } \
320 ); break
321#define CMPOP(OP, STRCMP_OP) { \
322 if (e.lhs->type()->is_character_sequence()) { \
323 M_insist(e.rhs->type()->is_character_sequence()); \
324 M_insist(CodeGenContext::Get().num_simd_lanes() == 1, "invalid number of SIMD lanes"); \
325 apply_binop( \
326 [](NChar lhs, NChar rhs) -> _Boolx1 { \
327 return strcmp(lhs, rhs, STRCMP_OP); \
328 } \
329 ); break; \
330 } else { \
331 BINOP(OP); \
332 } \
333 }
334
335 switch (e.op().type) {
336 default:
337 M_unreachable("illegal token type");
338
339 /*----- Arithmetic operations --------------------------------------------------------------------------------*/
340 case TK_PLUS: BINOP(+);
341 case TK_MINUS: BINOP(-);
342 case TK_ASTERISK: BINOP(*);
343 case TK_SLASH: BINOP(/);
344 case TK_PERCENT: BINOP(%);
345
346 /*----- Comparison operations --------------------------------------------------------------------------------*/
347 case TK_EQUAL: CMPOP(==, EQ);
348 case TK_BANG_EQUAL: CMPOP(!=, NE);
349 case TK_LESS: CMPOP(<, LT);
350 case TK_LESS_EQUAL: CMPOP(<=, LE);
351 case TK_GREATER: CMPOP(>, GT);
352 case TK_GREATER_EQUAL: CMPOP(>=, GE);
353
354 /*----- CharacterSequence operations -------------------------------------------------------------------------*/
355 case TK_Like: {
356 M_insist(e.lhs->type()->is_character_sequence());
357 M_insist(e.rhs->type()->is_character_sequence());
358 M_insist(CodeGenContext::Get().num_simd_lanes() == 1, "invalid number of SIMD lanes");
359 (*this)(*e.lhs);
360 NChar str = get<NChar>();
361 if (auto static_pattern = cast<ast::Constant>(e.rhs.get())) { // check whether specialization is applicable
362 auto pattern = Catalog::Get().pool(
363 interpret(*static_pattern->tok.text.assert_not_none()) // interpret pattern to handle escaped chars
364 );
365 if (std::regex_match(*pattern, std::regex("%[^_%\\\\]+%"))) { // contains expression
366 set(like_contains(str, pattern));
367 break;
368 }
369 if (std::regex_match(*pattern, std::regex("[^_%\\\\]+%"))) { // prefix expression
370 set(like_prefix(str, pattern));
371 break;
372 }
373 if (std::regex_match(*pattern, std::regex("%[^_%\\\\]+"))) { // suffix expression
374 set(like_suffix(str, pattern));
375 break;
376 }
377 }
378 /* no specialization applicable, fallback to general dynamic programming approach */
379 (*this)(*e.rhs);
380 NChar pattern = get<NChar>();
381 set(like(str, pattern));
382 break;
383 }
384
385 case TK_DOTDOT: {
386 M_insist(e.lhs->type()->is_character_sequence());
387 M_insist(e.rhs->type()->is_character_sequence());
388 M_insist(CodeGenContext::Get().num_simd_lanes() == 1, "invalid number of SIMD lanes");
389 (*this)(*e.lhs);
390 NChar lhs = get<NChar>();
391 (*this)(*e.rhs);
392 NChar rhs = get<NChar>();
393
394 M_insist(e.lhs->can_be_null() == lhs.can_be_null());
395 M_insist(e.rhs->can_be_null() == rhs.can_be_null());
396
397 Var<Ptr<Charx1>> res; // always set here
398 bool res_can_be_null = lhs.can_be_null() or rhs.can_be_null();
399 std::size_t res_length = lhs.length() + rhs.length() + 1; // allocate space for terminating NUL byte
400
401 if (res_can_be_null) {
402 auto [_ptr_lhs, is_nullptr_lhs] = lhs.split();
403 auto [_ptr_rhs, is_nullptr_rhs] = rhs.split();
404 Ptr<Charx1> ptr_lhs(_ptr_lhs), ptr_rhs(_ptr_rhs); // since structured bindings cannot be used in lambda capture
405
406 IF (is_nullptr_lhs or is_nullptr_rhs) {
408 } ELSE {
409 res = Module::Allocator().pre_malloc<char>(res_length); // create pre-allocation for result
410 Var<Ptr<Charx1>> ptr(strncpy(res, ptr_lhs, U32x1(lhs.length()))); // since res must not be changed
411 strncpy(ptr, ptr_rhs, U32x1(rhs.size_in_bytes())).discard(); // copy with possible terminating NUL byte
412 if (not rhs.guarantees_terminating_nul())
413 *ptr = '\0'; // terminate with NUL byte
414 };
415 } else {
416 res = Module::Allocator().pre_malloc<char>(res_length); // create pre-allocation for result
417 Var<Ptr<Charx1>> ptr(strncpy(res, lhs, U32x1(lhs.length()))); // since res must not be changed
418 strncpy(ptr, rhs, U32x1(rhs.size_in_bytes())).discard(); // copy with possible terminating NUL byte
419 if (not rhs.guarantees_terminating_nul())
420 *ptr = '\0'; // terminate with NUL byte
421 }
422
423 set(SQL_t(NChar(res, res_can_be_null, res_length, /* guarantees_terminating_nul= */ true)));
424 break;
425 }
426
427 /*----- Logical operations -----------------------------------------------------------------------------------*/
428 case TK_And:
429 case TK_Or: {
430 M_insist(e.lhs->type()->is_boolean());
431 M_insist(e.rhs->type()->is_boolean());
432
433 (*this)(*e.lhs);
434 _Boolx1 lhs = get<_Boolx1>();
435 (*this)(*e.rhs);
436 _Boolx1 rhs = get<_Boolx1>();
437
438 if (e.op().type == TK_And)
439 set(lhs and rhs);
440 else
441 set(lhs or rhs);
442
443 break;
444 }
445 }
446#undef CMPOP
447#undef BINOP
448}
449
451{
452 switch (e.get_function().fnid) {
453 default:
454 M_unreachable("function kind not implemented");
455
457 M_unreachable("UDFs not yet supported");
458
459 /*----- NULL check -------------------------------------------------------------------------------------------*/
460 case m::Function::FN_ISNULL: {
461 (*this)(*e.args[0]);
462 auto arg = get();
463 std::visit(overloaded { // do not use constraint `is_sql_type` since `is_null()` returns a `PrimitiveExpr`
464 [this]<sql_type T>(T actual) -> void requires requires { SQL_t(actual.is_null()); } {
465 set(actual.is_null());
466 },
467 []<sql_type T>(T actual) -> void requires (not requires { SQL_t(actual.is_null()); }) {
468 M_unreachable("NULL check not supported");
469 },
470 [](std::monostate) -> void { M_unreachable("invalid variant"); },
471 }, arg);
472 break;
473 }
474
475 /*----- Type cast --------------------------------------------------------------------------------------------*/
476 case m::Function::FN_INT: {
477 (*this)(*e.args[0]);
478 auto arg = get();
479 convert_in_place<int32_t>(arg);
480 set(std::move(arg));
481 break;
482 }
483
484 /*----- Aggregate functions ----------------------------------------------------------------------------------*/
485 case m::Function::FN_COUNT:
486 case m::Function::FN_MIN:
487 case m::Function::FN_MAX:
488 case m::Function::FN_SUM:
489 case m::Function::FN_AVG: {
490 std::ostringstream oss;
491 oss << e;
492 Schema::Identifier id(Catalog::Get().pool(oss.str().c_str()));
493 set(env_.get(id));
494 }
495 }
496}
497
499{
500 /* Search with fully qualified name. */
502 set(env_.get(id));
503}
504
506{
508 default: M_unreachable("invalid number of SIMD lanes");
509 case 1: return cnf.can_be_null() ? compile_cnf<true, 1>(*this, cnf) : compile_cnf<false, 1>(*this, cnf);
510 case 16: return cnf.can_be_null() ? compile_cnf<true, 16>(*this, cnf) : compile_cnf<false, 16>(*this, cnf);
511 case 32: return cnf.can_be_null() ? compile_cnf<true, 32>(*this, cnf) : compile_cnf<false, 32>(*this, cnf);
512 }
513}
514
515
516
517/*======================================================================================================================
518 * Environment
519 *====================================================================================================================*/
520
522void Environment::dump(std::ostream &out) const
523{
524 out << "WasmEnvironment\n` entries: { ";
525 for (auto it = exprs_.begin(), end = exprs_.end(); it != end; ++it) {
526 if (it != exprs_.begin()) out << ", ";
527 out << it->first;
528 }
529 out << " }" << std::endl;
530
531 out << "WasmEnvironment\n` address entries: { ";
532 for (auto it = expr_addrs_.begin(), end = expr_addrs_.end(); it != end; ++it) {
533 if (it != expr_addrs_.begin()) out << ", ";
534 out << it->first;
535 }
536 out << " }" << std::endl;
537}
538
539void Environment::dump() const { dump(std::cerr); }
541
542
543/*======================================================================================================================
544 * CodeGenContext
545 *====================================================================================================================*/
546
547thread_local std::unique_ptr<CodeGenContext> CodeGenContext::the_context_;
548
549
550/*======================================================================================================================
551 * compile data layout
552 *====================================================================================================================*/
553
554namespace m {
555
556namespace wasm {
557
572template<bool IsStore, std::size_t L, bool SinglePass, bool PointerSharing, VariableKind Kind>
573requires (L > 0) and (is_pow_2(L))
574std::tuple<Block, Block, Block>
575compile_data_layout_sequential(const Schema &_tuple_value_schema, const Schema &_tuple_addr_schema,
576 Ptr<void> base_address, const storage::DataLayout &layout, const Schema &layout_schema,
578{
579 const auto tuple_value_schema = _tuple_value_schema.deduplicate().drop_constants();
580 const auto tuple_addr_schema = _tuple_addr_schema.deduplicate().drop_constants();
581
582 M_insist(tuple_value_schema.num_entries() != 0, "sequential access must access at least one tuple schema entry");
583 M_insist(not IsStore or tuple_addr_schema.num_entries() == 0, "addresses are only computed for loads");
584#ifndef NDEBUG
585 for (auto &e : tuple_value_schema)
586 M_insist(layout_schema.find(e.id) != layout_schema.cend(), "tuple value schema entry not found");
587 for (auto &e : tuple_addr_schema) {
588 auto it = layout_schema.find(e.id);
589 M_insist(it != layout_schema.cend(), "tuple address schema entry not found");
590 M_insist(not it->nullable(), "nullable tuple address schema entry not yet supported");
591 M_insist(not it->type->is_boolean(), "boolean tuple address schema entry not yet supported");
592 M_insist(not it->type->is_character_sequence(), "character sequence tuple address schema entry omitted");
593 }
594#endif
595
598 Block inits("inits", false), stores("stores", false), loads("loads", false), jumps("jumps", false);
600 SQL_t values[tuple_value_schema.num_entries()];
602 SQL_addr_t *addrs;
603 if (not tuple_addr_schema.empty())
604 addrs = static_cast<SQL_addr_t*>(alloca(sizeof(SQL_addr_t) * tuple_addr_schema.num_entries()));
606 Bool<L> *null_bits;
607 if constexpr (not IsStore)
608 null_bits = static_cast<Bool<L>*>(alloca(sizeof(Bool<L>) * tuple_value_schema.num_entries()));
609
610 using key_t = std::pair<uint8_t, uint64_t>;
612 using ptr_t = std::conditional_t<SinglePass, Var<Ptr<void>>, Global<Ptr<void>>>;
614 using mask_t = std::conditional_t<SinglePass, Var<U32x1>, Global<U32x1>>;
615 struct value_t
616 {
617 ptr_t ptr;
618 std::optional<mask_t> mask;
619 };
623 std::conditional_t<
624 PointerSharing, std::unordered_map<key_t, value_t>, std::vector<std::pair<key_t, value_t>>
625 > loading_context;
626
627 auto &env = CodeGenContext::Get().env(); // the current codegen environment
628
629 if constexpr (L > 1) {
630 BLOCK_OPEN(inits) {
631 Wasm_insist(tuple_id % uint64_t(L) == 0U, "must start at a tuple ID beginning a SIMD batch");
632 }
633 }
634
635 /*----- Check whether any of the entries in `tuple_value_schema` can be NULL, so that we need the NULL bitmap. -----*/
636 const bool needs_null_bitmap = [&]() {
637 for (auto &tuple_entry : tuple_value_schema) {
638 if (layout_schema[tuple_entry.id].second.nullable())
639 return true; // found an entry in `tuple_value_schema` that can be NULL according to `layout_schema`
640 }
641 return false; // no attribute in `tuple_value_schema` can be NULL according to `layout_schema`
642 }();
643 bool has_null_bitmap = false; // indicates whether the data layout specifies a NULL bitmap
644
645 /*----- If predication is used, introduce predication variable and update it before storing a tuple. -----*/
646 const bool is_predicated = env.predicated();
647 M_insist(not is_predicated or (IsStore and L == 1), "predication only supported for storing scalar tuples");
648 std::optional<Var<Boolx1>> pred;
649 if (is_predicated) {
650 BLOCK_OPEN(stores) {
651 pred = env.extract_predicate<_Boolx1>().is_true_and_not_null();
652 }
653 }
654
655 /*----- Increment tuple ID before advancing to the next tuple pack. -----*/
656 if constexpr (IsStore) {
657 BLOCK_OPEN(jumps) {
658 if (is_predicated) {
659 M_insist(L == 1);
660 M_insist(bool(pred));
661 tuple_id += pred->to<uint64_t>();
662 } else {
663 tuple_id += uint64_t(L);
664 }
665 }
666 } else {
667 BLOCK_OPEN(jumps) {
668 tuple_id += uint64_t(L);
669 }
670 }
671
672 /*----- Visit the data layout. -----*/
673 layout.for_sibling_leaves(
674 [&, &inits=inits, &jumps=jumps, &stores=stores, &loads=loads] // explicitly capture references non-const
675 (const std::vector<DataLayout::leaf_info_t> &leaves, const DataLayout::level_info_stack_t &levels,
676 uint64_t inode_offset_in_bits)
677 {
678 /*----- Clear the per-leaf data structure. -----*/
679 loading_context.clear();
680
681 /*----- Remember whether and where we found the NULL bitmap. -----*/
682 std::optional<ptr_t> null_bitmap_ptr;
683 std::optional<mask_t> null_bitmap_mask;
684 uint8_t null_bitmap_bit_offset;
685 uint64_t null_bitmap_stride_in_bits;
686
687 /*----- Compute INode offset in bytes and INode iteration depending on the given tuple ID. -----*/
688 auto compute_additional_inode_byte_offset = [&](U64x1 tuple_id) -> U64x1 {
689 auto rec = [&](U64x1 curr_tuple_id, decltype(levels.cbegin()) curr, const decltype(levels.cend()) end,
690 auto rec) -> U64x1
691 {
692 if (curr == end) {
693 Wasm_insist(curr_tuple_id == tuple_id % uint64_t(levels.back().num_tuples));
694 return U64x1(0);
695 }
696
697 if (is_pow_2(curr->num_tuples)) {
698 U64x1 child_iter = curr_tuple_id.clone() >> uint64_t(__builtin_ctzl(curr->num_tuples));
699 U64x1 inner_tuple_id = curr_tuple_id bitand uint64_t(curr->num_tuples - 1U);
700 M_insist(curr->stride_in_bits % 8 == 0, "INode stride must be byte aligned");
701 U64x1 offset_in_bytes = child_iter * uint64_t(curr->stride_in_bits / 8);
702 return offset_in_bytes + rec(inner_tuple_id, std::next(curr), end, rec);
703 } else {
704 U64x1 child_iter = curr_tuple_id.clone() / uint64_t(curr->num_tuples);
705 U64x1 inner_tuple_id = curr_tuple_id % uint64_t(curr->num_tuples);
706 M_insist(curr->stride_in_bits % 8 == 0, "INode stride must be byte aligned");
707 U64x1 offset_in_bytes = child_iter * uint64_t(curr->stride_in_bits / 8);
708 return offset_in_bytes + rec(inner_tuple_id, std::next(curr), end, rec);
709 }
710 };
711 return rec(tuple_id.clone(), levels.cbegin(), levels.cend(), rec);
712 };
713 std::optional<const Var<I64x1>> inode_byte_offset;
714 std::optional<const Var<U64x1>> inode_iter;
715 BLOCK_OPEN(inits) {
716 M_insist(inode_offset_in_bits % 8 == 0, "INode offset must be byte aligned");
717 inode_byte_offset.emplace(
718 int64_t(inode_offset_in_bits / 8) + compute_additional_inode_byte_offset(tuple_id).make_signed()
719 );
720 M_insist(levels.back().num_tuples != 0, "INode must be large enough for at least one tuple");
721 if (levels.back().num_tuples != 1) {
722 inode_iter.emplace(
723 is_pow_2(levels.back().num_tuples) ? tuple_id bitand uint64_t(levels.back().num_tuples - 1U)
724 : tuple_id % uint64_t(levels.back().num_tuples)
725 );
726 } else {
727 /* omit computation of INode iteration since it is always the first iteration, i.e. equals 0 */
728 }
729 };
730
731 /*----- Iterate over sibling leaves, i.e. leaf children of a common parent INode, to emit code. -----*/
732 for (auto &leaf_info : leaves) {
733 const uint8_t bit_stride = leaf_info.stride_in_bits % 8; // need byte stride later for the stride jumps
734
735 if (leaf_info.leaf.index() == layout_schema.num_entries()) { // NULL bitmap
736 if (not needs_null_bitmap)
737 continue;
738
740 M_insist(not has_null_bitmap, "at most one bitmap may be specified");
741 has_null_bitmap = true;
742 if (bit_stride) { // NULL bitmap with bit stride requires dynamic masking
743 M_insist(L == 1, "SIMDfied loading of NULL bitmap with bit stride currently not supported");
744
745 M_insist(bool(inode_iter), "stride requires repetition");
746 U64x1 leaf_offset_in_bits = leaf_info.offset_in_bits + *inode_iter * leaf_info.stride_in_bits;
747 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>() ; // mod 8
748 I64x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed(); // div 8
749
750 null_bitmap_bit_offset = leaf_info.offset_in_bits % 8;
751 null_bitmap_stride_in_bits = leaf_info.stride_in_bits;
752 BLOCK_OPEN(inits) {
753 /*----- Initialize pointer and mask. -----*/
754 null_bitmap_ptr.emplace(); // default-construct for globals to be able to use assignment below
755 *null_bitmap_ptr = base_address.clone() + *inode_byte_offset + leaf_byte_offset;
756 null_bitmap_mask.emplace(); // default-construct for globals to be able to use assignment below
757 *null_bitmap_mask = 1U << leaf_bit_offset;
758 }
759
760 /*----- Iterate over layout entries in *ascending* order. -----*/
761 std::size_t prev_layout_idx = 0;
762 for (std::size_t layout_idx = 0; layout_idx < layout_schema.num_entries(); ++layout_idx) {
763 auto &layout_entry = layout_schema[layout_idx];
764 if (layout_entry.nullable()) { // layout entry may be NULL
765 auto tuple_it = tuple_value_schema.find(layout_entry.id);
766 if (tuple_it == tuple_value_schema.end())
767 continue; // entry not contained in tuple schema
768 M_insist(prev_layout_idx == 0 or layout_idx > prev_layout_idx,
769 "layout entries not processed in ascending order");
770 M_insist(*tuple_it->type == *layout_entry.type);
771 const auto delta = layout_idx - prev_layout_idx;
772 const uint8_t bit_delta = delta % 8;
773 const int64_t byte_delta = delta / 8;
774
775 auto advance_to_next_bit = [&]() {
776 if (bit_delta) {
777 if (is_predicated) {
778 M_insist(bool(pred));
779 *null_bitmap_mask <<=
780 Select(*pred, bit_delta, uint8_t(0)); // possibly advance mask
781 } else {
782 *null_bitmap_mask <<= bit_delta; // advance mask
783 }
784 /* If the mask surpasses the first byte, advance pointer to the next byte... */
785 *null_bitmap_ptr += (*null_bitmap_mask bitand 0xffU).eqz().template to<int64_t>();
786 /* ... and remove lowest byte from the mask. */
787 *null_bitmap_mask = Select((*null_bitmap_mask bitand 0xffU).eqz(),
788 *null_bitmap_mask >> 8U, *null_bitmap_mask);
789 }
790 if (byte_delta) {
791 if (is_predicated) {
792 M_insist(bool(pred));
793 *null_bitmap_ptr +=
794 Select(*pred, byte_delta, 0); // possibly advance pointer
795 } else {
796 *null_bitmap_ptr += byte_delta; // advance pointer
797 }
798 }
799 };
800
801 if constexpr (IsStore) {
802 /*----- Store NULL bit depending on its type. -----*/
803 auto store = [&]<typename T>() {
804 BLOCK_OPEN(stores) {
805 advance_to_next_bit();
806
807 auto [value, is_null] = env.get<T>(tuple_it->id).split(); // get value
808 value.discard(); // handled at entry leaf
809 setbit(null_bitmap_ptr->template to<uint8_t*>(), is_null,
810 null_bitmap_mask->template to<uint8_t>()); // update bit
811 }
812 };
814 [&](const Boolean&) { store.template operator()<_Boolx1>(); },
815 [&](const Numeric &n) {
816 switch (n.kind) {
817 case Numeric::N_Int:
818 case Numeric::N_Decimal:
819 switch (n.size()) {
820 default: M_unreachable("invalid size");
821 case 8: store.template operator()<_I8x1 >(); break;
822 case 16: store.template operator()<_I16x1>(); break;
823 case 32: store.template operator()<_I32x1>(); break;
824 case 64: store.template operator()<_I64x1>(); break;
825 }
826 break;
827 case Numeric::N_Float:
828 if (n.size() <= 32)
829 store.template operator()<_Floatx1>();
830 else
831 store.template operator()<_Doublex1>();
832 }
833 },
834 [&](const CharacterSequence&) {
835 BLOCK_OPEN(stores) {
836 advance_to_next_bit();
837
838 auto value = env.get<NChar>(tuple_it->id); // get value
839 setbit(null_bitmap_ptr->template to<uint8_t*>(), value.is_null(),
840 null_bitmap_mask->template to<uint8_t>()); // update bit
841 }
842 },
843 [&](const Date&) { store.template operator()<_I32x1>(); },
844 [&](const DateTime&) { store.template operator()<_I64x1>(); },
845 [](auto&&) { M_unreachable("invalid type"); },
846 }, *tuple_it->type);
847 } else {
848 const auto tuple_idx = std::distance(tuple_value_schema.begin(), tuple_it);
849 BLOCK_OPEN(loads) {
850 advance_to_next_bit();
851
852 U8x1 byte = *null_bitmap_ptr->template to<uint8_t*>(); // load the byte
854 (byte bitand *null_bitmap_mask).template to<bool>()
855 ); // mask bit with dynamic mask
856 new (&null_bits[tuple_idx]) Boolx1(value);
857 /* Address for NULL bits not yet supported. */
858 }
859 }
860
861 prev_layout_idx = layout_idx;
862 } else { // layout entry must not be NULL
863#ifndef NDEBUG
864 if constexpr (IsStore) {
865 /*----- Check that value is also not NULL. -----*/
866 auto check = overloaded{
867 [&]<sql_type T>() {
868 BLOCK_OPEN(stores) {
869 Wasm_insist(env.get<T>(layout_entry.id).not_null(),
870 "value of non-nullable entry must not be nullable");
871 }
872 },
873 []<typename>() {
874 M_unreachable("invalid type for given number of SIMD lanes");
875 }
876 };
878 [&](const Boolean&) { check.template operator()<_Bool<L>>(); },
879 [&](const Numeric &n) {
880 switch (n.kind) {
881 case Numeric::N_Int:
882 case Numeric::N_Decimal:
883 switch (n.size()) {
884 default: M_unreachable("invalid size");
885 case 8: check.template operator()<_I8 <L>>(); break;
886 case 16: check.template operator()<_I16<L>>(); break;
887 case 32: check.template operator()<_I32<L>>(); break;
888 case 64: check.template operator()<_I64<L>>(); break;
889 }
890 break;
891 case Numeric::N_Float:
892 if (n.size() <= 32)
893 check.template operator()<_Float<L>>();
894 else
895 check.template operator()<_Double<L>>();
896 }
897 },
898 [&](const CharacterSequence&) { check.template operator()<NChar>(); },
899 [&](const Date&) { check.template operator()<_I32<L>>(); },
900 [&](const DateTime&) { check.template operator()<_I64<L>>(); },
901 [](auto&&) { M_unreachable("invalid type"); },
902 }, *layout_entry.type);
903 }
904#endif
905 }
906 }
907
908 /*----- Final advancement of the pointer and mask to match the leaf's stride. -----*/
909 /* This is done here (and not together with the other stride jumps further below) since we only need
910 * to advance by `delta` bits since we already have advanced by `prev_layout_idx` bits. */
911 const auto delta = leaf_info.stride_in_bits - prev_layout_idx;
912 const uint8_t bit_delta = delta % 8;
913 const int64_t byte_delta = delta / 8;
914 if (bit_delta) {
915 BLOCK_OPEN(jumps) {
916 if (is_predicated) {
917 M_insist(bool(pred));
918 *null_bitmap_mask <<= Select(*pred, bit_delta, uint8_t(0)); // possibly advance mask
919 } else {
920 *null_bitmap_mask <<= bit_delta; // advance mask
921 }
922 /* If the mask surpasses the first byte, advance pointer to the next byte... */
923 *null_bitmap_ptr += (*null_bitmap_mask bitand 0xffU).eqz().template to<int64_t>();
924 /* ... and remove the lowest byte from the mask. */
925 *null_bitmap_mask = Select((*null_bitmap_mask bitand 0xffU).eqz(),
926 *null_bitmap_mask >> 8U, *null_bitmap_mask);
927 }
928 }
929 if (byte_delta) {
930 BLOCK_OPEN(jumps) {
931 if (is_predicated) {
932 M_insist(bool(pred));
933 *null_bitmap_ptr += Select(*pred, byte_delta, 0); // possibly advance pointer
934 } else {
935 *null_bitmap_ptr += byte_delta; // advance pointer
936 }
937 }
938 }
939 } else { // NULL bitmap without bit stride can benefit from static masking of NULL bits
940 M_insist(L == 1 or L >= 16,
941 "NULL bits must fill at least an entire SIMD vector when loading SIMDfied");
942 M_insist(L == 1 or tuple_value_schema.num_entries() <= 64,
943 "bytes containing a NULL bitmap must fit into scalar value when loading SIMDfied");
944 M_insist(L == 1 or
945 std::max(ceil_to_pow_2(tuple_value_schema.num_entries()), 8UL) == leaf_info.stride_in_bits,
946 "NULL bitmaps must be packed s.t. the distance between two NULL bits of a single "
947 "attribute is a power of 2 when loading SIMDfied");
948 M_insist(L == 1 or leaf_info.offset_in_bits % 8 == 0,
949 "NULL bitmaps must not start with bit offset when loading SIMDfied");
950
951 auto byte_offset = [&]() -> I64x1 {
952 if (inode_iter and leaf_info.stride_in_bits) {
953 /* omit `leaf_info.offset_in_bits` here to add it to the static offsets and masks;
954 * this is valid since no bit stride means that the leaf byte offset computation is
955 * independent of the static parts */
956 U64x1 leaf_offset_in_bits = *inode_iter * leaf_info.stride_in_bits;
957 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>(); // mod 8
958 I64x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed(); // div 8
959 BLOCK_OPEN(inits) {
960 Wasm_insist(leaf_bit_offset == 0U, "no leaf bit offset without bit stride");
961 }
962 return *inode_byte_offset + leaf_byte_offset;
963 } else {
964 return *inode_byte_offset;
965 }
966 }();
967
968 key_t key(leaf_info.offset_in_bits % 8, leaf_info.stride_in_bits);
969 auto [it, inserted] =
970 M_CONSTEXPR_COND(PointerSharing,
971 loading_context.try_emplace(std::move(key)),
972 std::make_pair(loading_context.emplace(loading_context.end(), std::move(key), value_t()), true));
973 if (inserted) {
974 BLOCK_OPEN(inits) {
975 it->second.ptr = base_address.clone() + byte_offset;
976 }
977 } else {
978 byte_offset.discard();
979 }
980 const auto &ptr = it->second.ptr;
981
983 std::unordered_map<int64_t, Var<U8x1>> loaded_bytes;
984
985 using bytes_t = std::variant<std::monostate, Var<U8<L>>, Var<U16<L>>, Var<U32<L>>, Var<U64<L>>>;
987 bytes_t bytes;
988 if constexpr (not IsStore and L > 1) {
989 auto emplace = [&]<typename T>() {
990 using type = typename T::type;
991 static constexpr std::size_t lanes = T::num_simd_lanes;
992 BLOCK_OPEN(loads) {
993 bytes.template emplace<Var<T>>(
994 *(ptr + leaf_info.offset_in_bits / 8).template to<type*, lanes>()
995 );
996 }
997 };
998 switch (ceil_to_pow_2(tuple_value_schema.num_entries())) {
999 default: emplace.template operator()<U8 <L>>(); break; // <= 8
1000 case 16: emplace.template operator()<U16<L>>(); break;
1001 case 32: emplace.template operator()<U32<L>>(); break;
1002 case 64: emplace.template operator()<U64<L>>(); break;
1003 }
1004 }
1005
1006 /*----- For each tuple entry that can be NULL, create a store/load with static offset and mask. --*/
1007 for (std::size_t tuple_idx = 0; tuple_idx != tuple_value_schema.num_entries(); ++tuple_idx) {
1008 auto &tuple_entry = tuple_value_schema[tuple_idx];
1009 const auto &[layout_idx, layout_entry] = layout_schema[tuple_entry.id];
1010 M_insist(*tuple_entry.type == *layout_entry.type);
1011 if (layout_entry.nullable()) { // layout entry may be NULL
1012 const uint8_t static_bit_offset = (leaf_info.offset_in_bits + layout_idx) % 8;
1013 const int64_t static_byte_offset = (leaf_info.offset_in_bits + layout_idx) / 8;
1014 if constexpr (IsStore) {
1015 /*----- Store NULL bit depending on its type. -----*/
1016 auto store = overloaded{
1017 [&]<sql_type T>() {
1018 BLOCK_OPEN(stores) {
1019 auto [value, is_null] = env.get<T>(tuple_entry.id).split(); // get value
1020 value.discard(); // handled at entry leaf
1021 if constexpr (L == 1) {
1022 Ptr<U8x1> byte_ptr =
1023 (ptr + static_byte_offset).template to<uint8_t*>(); // compute byte address
1024 setbit<U8x1>(byte_ptr, is_null, static_bit_offset); // update bit
1025 } else {
1026 auto store = [&, is_null, layout_idx]<typename U>() { // copy due to structured binding
1027 using type = typename U::type;
1028 static constexpr std::size_t lanes = U::num_simd_lanes;
1029 Ptr<U> bytes_ptr =
1030 (ptr + leaf_info.offset_in_bits / 8).template to<type*, lanes>(); // compute bytes address
1031 setbit<U>(bytes_ptr, is_null, layout_idx); // update bits
1032 };
1033 switch (ceil_to_pow_2(tuple_value_schema.num_entries())) {
1034 default: store.template operator()<U8 <L>>(); break; // <= 8
1035 case 16: store.template operator()<U16<L>>(); break;
1036 case 32: store.template operator()<U32<L>>(); break;
1037 case 64: store.template operator()<U64<L>>(); break;
1038 }
1039 }
1040 }
1041 },
1042 []<typename>() {
1043 M_unreachable("invalid type for given number of SIMD lanes");
1044 }
1045 };
1047 [&](const Boolean&) { store.template operator()<_Bool<L>>(); },
1048 [&](const Numeric &n) {
1049 switch (n.kind) {
1050 case Numeric::N_Int:
1051 case Numeric::N_Decimal:
1052 switch (n.size()) {
1053 default: M_unreachable("invalid size");
1054 case 8: store.template operator()<_I8 <L>>(); break;
1055 case 16: store.template operator()<_I16<L>>(); break;
1056 case 32: store.template operator()<_I32<L>>(); break;
1057 case 64: store.template operator()<_I64<L>>(); break;
1058 }
1059 break;
1060 case Numeric::N_Float:
1061 if (n.size() <= 32)
1062 store.template operator()<_Float<L>>();
1063 else
1064 store.template operator()<_Double<L>>();
1065 }
1066 },
1067 [&](const CharacterSequence&) {
1068 M_insist(L == 1, "string SIMDfication currently not supported");
1069 BLOCK_OPEN(stores) {
1070 auto value = env.get<NChar>(tuple_entry.id); // get value
1071 Ptr<U8x1> byte_ptr =
1072 (ptr + static_byte_offset).template to<uint8_t*>(); // compute byte address
1073 setbit<U8x1>(byte_ptr, value.is_null(), static_bit_offset); // update bit
1074 }
1075 },
1076 [&](const Date&) { store.template operator()<_I32<L>>(); },
1077 [&](const DateTime&) { store.template operator()<_I64<L>>(); },
1078 [](auto&&) { M_unreachable("invalid type"); },
1079 }, *tuple_entry.type);
1080 } else {
1081 /*----- Load NULL bit. -----*/
1082 BLOCK_OPEN(loads) {
1083 if constexpr (L == 1) {
1084 auto [it, inserted] = loaded_bytes.try_emplace(static_byte_offset);
1085 if (inserted)
1086 it->second = *(ptr + static_byte_offset).template to<uint8_t*>(); // load the byte
1087 const auto &byte = it->second;
1088 const uint8_t static_mask = 1U << static_bit_offset;
1089 Var<Boolx1> value((byte bitand static_mask).to<bool>()); // mask bit with static mask
1090 new (&null_bits[tuple_idx]) Boolx1(value);
1091 /* Address for NULL bits not yet supported. */
1092 } else {
1093 std::visit(overloaded{
1094 [&, layout_idx]<typename T> // copy due to structured binding
1096 requires (L >= 16) {
1097 PrimitiveExpr<T, L> static_mask(1U << layout_idx);
1099 (_bytes bitand static_mask).template to<bool>() // mask bits with static mask
1100 );
1101 new (&null_bits[tuple_idx]) Bool<L>(value);
1102 /* Address for NULL bits not yet supported. */
1103 },
1104 [](auto&) { M_unreachable("invalid number of SIMD lanes"); },
1105 [](std::monostate&) { M_unreachable("invalid variant"); },
1106 }, const_cast<const bytes_t&>(bytes));
1107 }
1108 }
1109 }
1110 } else { // entry must not be NULL
1111#ifndef NDEBUG
1112 if constexpr (IsStore) {
1113 /*----- Check that value is also not NULL. -----*/
1114 auto check = overloaded{
1115 [&, layout_entry]<sql_type T>() { // copy due to structured binding
1116 BLOCK_OPEN(stores) {
1117 Wasm_insist(env.get<T>(layout_entry.id).not_null(),
1118 "value of non-nullable entry must not be nullable");
1119 }
1120 },
1121 []<typename>() {
1122 M_unreachable("invalid type for given number of SIMD lanes");
1123 }
1124 };
1126 [&](const Boolean&) { check.template operator()<_Bool<L>>(); },
1127 [&](const Numeric &n) {
1128 switch (n.kind) {
1129 case Numeric::N_Int:
1130 case Numeric::N_Decimal:
1131 switch (n.size()) {
1132 default: M_unreachable("invalid size");
1133 case 8: check.template operator()<_I8 <L>>(); break;
1134 case 16: check.template operator()<_I16<L>>(); break;
1135 case 32: check.template operator()<_I32<L>>(); break;
1136 case 64: check.template operator()<_I64<L>>(); break;
1137 }
1138 break;
1139 case Numeric::N_Float:
1140 if (n.size() <= 32)
1141 check.template operator()<_Float<L>>();
1142 else
1143 check.template operator()<_Double<L>>();
1144 }
1145 },
1146 [&](const CharacterSequence&) { check.template operator()<NChar>(); },
1147 [&](const Date&) { check.template operator()<_I32<L>>(); },
1148 [&](const DateTime&) { check.template operator()<_I64<L>>(); },
1149 [](auto&&) { M_unreachable("invalid type"); },
1150 }, *tuple_entry.type);
1151 }
1152#endif
1153 }
1154 }
1155 }
1156 } else { // regular entry
1157 auto &layout_entry = layout_schema[leaf_info.leaf.index()];
1158 M_insist(*layout_entry.type == *leaf_info.leaf.type());
1159 auto tuple_value_it = tuple_value_schema.find(layout_entry.id);
1160 auto tuple_addr_it = tuple_addr_schema.find(layout_entry.id);
1161 if (tuple_value_it == tuple_value_schema.end() and tuple_addr_it == tuple_addr_schema.end())
1162 continue; // entry not contained in both tuple schemas
1163 auto tuple_it = tuple_value_it != tuple_value_schema.end() ? tuple_value_it : tuple_addr_it;
1164 M_insist(*tuple_it->type == *layout_entry.type);
1165 const auto tuple_value_idx = std::distance(tuple_value_schema.begin(), tuple_value_it);
1166 const auto tuple_addr_idx = std::distance(tuple_addr_schema.begin(), tuple_addr_it);
1167
1168 if (bit_stride) { // entry with bit stride requires dynamic masking (for scalar loading)
1169 M_insist(tuple_it->type->is_boolean(),
1170 "leaf bit stride currently only for `Boolean` supported");
1171 M_insist(L == 1 or L >= 16,
1172 "booleans must fill at least an entire SIMD vector when loading SIMDfied");
1173 M_insist(L <= 64, "bytes containing booleans must fit into scalar value when loading SIMDfied");
1174 M_insist(L == 1 or leaf_info.stride_in_bits == 1,
1175 "booleans must be packed consecutively when loading SIMDfied");
1176
1177 M_insist(bool(inode_iter), "stride requires repetition");
1178 U64x1 leaf_offset_in_bits = leaf_info.offset_in_bits + *inode_iter * leaf_info.stride_in_bits;
1179 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>() ; // mod 8
1180 I64x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed(); // div 8
1181
1182 if constexpr (L > 1) {
1183 BLOCK_OPEN(inits) {
1184 Wasm_insist(leaf_bit_offset == 0U,
1185 "booleans must not start with bit offset when loading SIMDfied");
1186 }
1187 }
1188
1189 key_t key(leaf_info.offset_in_bits % 8, leaf_info.stride_in_bits);
1190 auto [it, inserted] =
1191 M_CONSTEXPR_COND(PointerSharing,
1192 loading_context.try_emplace(std::move(key)),
1193 std::make_pair(loading_context.emplace(loading_context.end(), std::move(key), value_t()), true));
1194 M_insist(inserted == not it->second.mask);
1195 if (inserted) {
1196 BLOCK_OPEN(inits) {
1197 /* do not add `leaf_byte_offset` to pointer here as it may be different for shared entries */
1198 it->second.ptr = base_address.clone() + *inode_byte_offset;
1199 it->second.mask.emplace(); // default-construct for globals to be able to use assignment below
1200 if constexpr (L == 1)
1201 *it->second.mask = 1U << leaf_bit_offset; // init mask for scalar loading
1202 /* no dynamic mask required for SIMDfied loading */
1203 }
1204 } else {
1205 leaf_bit_offset.discard();
1206 }
1207 const auto &ptr = it->second.ptr;
1208
1209 if constexpr (IsStore) {
1210 if constexpr (sql_type<_Bool<L>>) {
1211 /*----- Store value. -----*/
1212 BLOCK_OPEN(stores) {
1213 auto [value, is_null] = env.get<_Bool<L>>(tuple_it->id).split(); // get value
1214 is_null.discard(); // handled at NULL bitmap leaf
1215 if constexpr (L == 1) {
1216 Ptr<U8x1> byte_ptr =
1217 (ptr + leaf_byte_offset).template to<uint8_t*>(); // compute byte address
1218 const auto &mask = *it->second.mask;
1219 setbit(byte_ptr, value, mask.template to<uint8_t>()); // update bit
1220 } else {
1221 using bytes_t = uint_t<L / 8>;
1222 Ptr<PrimitiveExpr<bytes_t>> bytes_ptr =
1223 (ptr + leaf_byte_offset).template to<bytes_t*>(); // compute bytes address
1224 *bytes_ptr = value.bitmask().template to<bytes_t>(); // update all bits at once
1225 }
1226 }
1227 } else {
1228 M_unreachable("invalid type for given number of SIMD lanes");
1229 }
1230 } else {
1231 if constexpr (sql_type<_Bool<L>>) {
1232 /*----- Load value. -----*/
1233 BLOCK_OPEN(loads) {
1234 if constexpr (L == 1) {
1235 U8x1 byte = *(ptr + leaf_byte_offset).template to<uint8_t*>(); // load byte
1236 const auto &mask = *it->second.mask;
1237 if (tuple_value_it != tuple_value_schema.end()) {
1239 (byte.clone() bitand mask.template to<uint8_t>()).template to<bool>() // mask bit with dynamic mask
1240 );
1241 new (&values[tuple_value_idx]) SQL_t(_Boolx1(value));
1242 }
1243 /* Address for booleans not yet supported. */
1244 byte.discard();
1245 } else {
1246 using bytes_t = uint_t<L / 8>;
1247 const Var<PrimitiveExpr<bytes_t>> bytes(
1248 *(ptr + leaf_byte_offset).template to<bytes_t*>() // load bytes
1249 ); // create local variable to avoid cloning when broadcasting `bytes`
1250 auto create_mask = [&]<std::size_t... Is>(std::index_sequence<Is...>) {
1251 return PrimitiveExpr<bytes_t, L>(bytes_t(1UL << Is)...);
1252 };
1253 auto static_mask = create_mask(std::make_index_sequence<L>());
1254 if (tuple_value_it != tuple_value_schema.end()) {
1256 (bytes.template broadcast<L>() bitand static_mask).template to<bool>() // mask bits with static mask
1257 );
1258 new (&values[tuple_value_idx]) SQL_t(_Bool<L>(value));
1259 } else {
1260 bytes.val().discard(); // XXX: remove once address for booleans are supported
1261 }
1262 /* Address for booleans not yet supported. */
1263 }
1264 }
1265 } else {
1266 M_unreachable("invalid type for given number of SIMD lanes");
1267 }
1268 }
1269 } else { // entry without bit stride; if masking is required, we can use a static mask
1270 auto byte_offset = [&]() -> I64x1 {
1271 if (inode_iter and leaf_info.stride_in_bits) {
1272 /* omit `leaf_info.offset_in_bits` here to use it as static offset and mask;
1273 * this is valid since no bit stride means that the leaf byte offset computation is
1274 * independent of the static parts */
1275 U64x1 leaf_offset_in_bits = *inode_iter * leaf_info.stride_in_bits;
1276 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>(); // mod 8
1277 I64x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed(); // div 8
1278 BLOCK_OPEN(inits) {
1279 Wasm_insist(leaf_bit_offset == 0U, "no leaf bit offset without bit stride");
1280 }
1281 return *inode_byte_offset + leaf_byte_offset;
1282 } else {
1283 return *inode_byte_offset;
1284 }
1285 }();
1286
1287 const uint8_t static_bit_offset = leaf_info.offset_in_bits % 8;
1288 const int64_t static_byte_offset = leaf_info.offset_in_bits / 8;
1289
1290 key_t key(leaf_info.offset_in_bits % 8, leaf_info.stride_in_bits);
1291 auto [it, inserted] =
1292 M_CONSTEXPR_COND(PointerSharing,
1293 loading_context.try_emplace(std::move(key)),
1294 std::make_pair(loading_context.emplace(loading_context.end(), std::move(key), value_t()), true));
1295 if (inserted) {
1296 BLOCK_OPEN(inits) {
1297 it->second.ptr = base_address.clone() + byte_offset;
1298 }
1299 } else {
1300 byte_offset.discard();
1301 }
1302 const auto &ptr = it->second.ptr;
1303
1304 /*----- Store value depending on its type. -----*/
1305 auto store = overloaded{
1306 [&]<sql_type T>() {
1307 using type = typename T::type;
1308 static constexpr std::size_t lanes = T::num_simd_lanes;
1309 M_insist(static_bit_offset == 0,
1310 "leaf offset of `Numeric`, `Date`, or `DateTime` must be byte aligned");
1311 BLOCK_OPEN(stores) {
1312 auto [value, is_null] = env.get<T>(tuple_it->id).split(); // get value
1313 is_null.discard(); // handled at NULL bitmap leaf
1314 *(ptr + static_byte_offset).template to<type*, lanes>() = value;
1315 }
1316 },
1317 []<typename>() {
1318 M_unreachable("invalid type for given number of SIMD lanes");
1319 }
1320 };
1321 /*----- Load value depending on its type. -----*/
1322 auto load = overloaded{
1323 [&]<sql_type T>() {
1324 using type = typename T::type;
1325 static constexpr std::size_t lanes = T::num_simd_lanes;
1326 M_insist(static_bit_offset == 0,
1327 "leaf offset of `Numeric`, `Date`, or `DateTime` must be byte aligned");
1328 BLOCK_OPEN(loads) {
1329 if (tuple_value_it != tuple_value_schema.end()) {
1331 *(ptr + static_byte_offset).template to<type*, lanes>()
1332 );
1333 new (&values[tuple_value_idx]) SQL_t(T(value));
1334 }
1335 if (tuple_addr_it != tuple_addr_schema.end())
1336 new (&addrs[tuple_addr_idx]) SQL_addr_t(
1337 (ptr + static_byte_offset).template to<type*, lanes>()
1338 );
1339 }
1340 },
1341 []<typename>() {
1342 M_unreachable("invalid type for given number of SIMD lanes");
1343 }
1344 };
1345 /*----- Select call target (store or load) and visit attribute type. -----*/
1346#define CALL(TYPE) if constexpr (IsStore) store.template operator()<TYPE>(); else load.template operator()<TYPE>()
1348 [&](const Boolean&) {
1349 M_insist(L == 1 or leaf_info.stride_in_bits == 8,
1350 "booleans must be packed consecutively in bytes when loading SIMDfied");
1351 if constexpr (sql_type<_Bool<L>>) {
1352 if constexpr (IsStore) {
1353 /*----- Store value. -----*/
1354 BLOCK_OPEN(stores) {
1355 auto [value, is_null] = env.get<_Bool<L>>(tuple_it->id).split(); // get value
1356 is_null.discard(); // handled at NULL bitmap leaf
1357 Ptr<U8<L>> byte_ptr =
1358 (ptr + static_byte_offset).template to<uint8_t*, L>(); // compute byte address
1359 setbit<U8<L>>(byte_ptr, value, static_bit_offset); // update bit
1360 }
1361 } else {
1362 /*----- Load value. -----*/
1363 BLOCK_OPEN(loads) {
1364 U8<L> byte =
1365 *(ptr + static_byte_offset).template to<uint8_t*, L>(); // load byte
1366 U8<L> static_mask(1U << static_bit_offset);
1367
1368 if (tuple_value_it != tuple_value_schema.end()) {
1370 (byte.clone() bitand static_mask.clone()).template to<bool>() // mask bit with static mask
1371 );
1372 new (&values[tuple_value_idx]) SQL_t(_Bool<L>(value));
1373 }
1374 /* Address for booleans not yet supported. */
1375 byte.discard();
1376 static_mask.discard();
1377 }
1378 }
1379 } else {
1380 M_unreachable("invalid type for given number of SIMD lanes");
1381 }
1382 },
1383 [&](const Numeric &n) {
1384 switch (n.kind) {
1385 case Numeric::N_Int:
1386 case Numeric::N_Decimal:
1387 switch (n.size()) {
1388 default: M_unreachable("invalid size");
1389 case 8: CALL(_I8 <L>); break;
1390 case 16: CALL(_I16<L>); break;
1391 case 32: CALL(_I32<L>); break;
1392 case 64: CALL(_I64<L>); break;
1393 }
1394 break;
1395 case Numeric::N_Float:
1396 if (n.size() <= 32)
1397 CALL(_Float<L>);
1398 else
1399 CALL(_Double<L>);
1400 }
1401 },
1402 [&](const CharacterSequence &cs) {
1403 M_insist(L == 1, "string SIMDfication currently not supported");
1404 M_insist(static_bit_offset == 0, "leaf offset of `CharacterSequence` must be byte aligned");
1405 if constexpr (IsStore) {
1406 /*----- Store value. -----*/
1407 BLOCK_OPEN(stores) {
1408 auto value = env.get<NChar>(tuple_it->id); // get value
1409 IF (value.clone().not_null()) {
1410 Ptr<Charx1> address((ptr + static_byte_offset).template to<char*>());
1411 strncpy(address, value, U32x1(cs.size() / 8)).discard();
1412 };
1413 }
1414 } else {
1415 /*----- Load value. -----*/
1416 BLOCK_OPEN(loads) {
1417 Ptr<Charx1> address((ptr + static_byte_offset).template to<char*>());
1418 new (&values[tuple_value_idx]) SQL_t(
1419 NChar(address, layout_entry.nullable(), cs.length, cs.is_varying)
1420 );
1421 /* Omit addresses for character sequences. */
1422 }
1423 }
1424 },
1425 [&](const Date&) { CALL(_I32<L>); },
1426 [&](const DateTime&) { CALL(_I64<L>); },
1427 [](auto&&) { M_unreachable("invalid type"); },
1428 }, *tuple_it->type);
1429#undef CALL
1430 }
1431 }
1432 }
1433
1434 /*----- Recursive lambda to emit stride jumps by processing path from leaves (excluding) to the root. -----*/
1435 auto emit_stride_jumps = [&](decltype(levels.crbegin()) curr, const decltype(levels.crend()) end) -> void {
1436 auto rec = [&](decltype(levels.crbegin()) curr, const decltype(levels.crend()) end, auto rec) -> void {
1437 if (curr == end) return;
1438
1439 const auto inner = std::prev(curr); // the child INode of `curr`
1440 M_insist(curr->num_tuples % inner->num_tuples == 0, "curr must be whole multiple of inner");
1441
1442 /*----- Compute remaining stride for this level. -----*/
1443 const auto num_repetition_inner = curr->num_tuples / inner->num_tuples;
1444 const auto stride_remaining_in_bits = curr->stride_in_bits -
1445 num_repetition_inner * inner->stride_in_bits;
1446 M_insist(stride_remaining_in_bits % 8 == 0,
1447 "remaining stride of INodes must be whole multiple of a byte");
1448
1449 /*----- If there is a remaining stride for this level, emit conditional stride jump. -----*/
1450 if (const int64_t remaining_stride_in_bytes = stride_remaining_in_bits / 8) [[likely]] {
1451 M_insist(curr->num_tuples > 0);
1452 if (curr->num_tuples != 1U) {
1453 Boolx1 cond_mod = (tuple_id % uint64_t(curr->num_tuples)).eqz();
1454 Boolx1 cond_and = (tuple_id bitand uint64_t(curr->num_tuples - 1U)).eqz();
1455 const bool use_and = is_pow_2(curr->num_tuples) and options::remainder_removal;
1456 Boolx1 cond = use_and ? cond_and : cond_mod; // select implementation to use...
1457 (use_and ? cond_mod : cond_and).discard(); // ... and discard the other
1458
1459 /*----- Emit conditional stride jumps. -----*/
1460 IF (cond) {
1461 for (auto &[_, value] : loading_context) {
1462 if (is_predicated) {
1463 M_insist(bool(pred));
1464 value.ptr += Select(*pred, remaining_stride_in_bytes, 0); // possibly emit stride jump
1465 } else {
1466 value.ptr += remaining_stride_in_bytes; // emit stride jump
1467 }
1468 }
1469 if (null_bitmap_ptr) {
1470 if (is_predicated) {
1471 M_insist(bool(pred));
1472 *null_bitmap_ptr +=
1473 Select(*pred, remaining_stride_in_bytes, 0); // possibly emit stride jump
1474 } else {
1475 *null_bitmap_ptr += remaining_stride_in_bytes; // emit stride jump
1476 }
1477 }
1478
1479 /*----- Recurse within IF. -----*/
1480 rec(std::next(curr), end, rec);
1481 };
1482 } else {
1483 for (auto &[_, value] : loading_context) {
1484 if (is_predicated) {
1485 M_insist(bool(pred));
1486 value.ptr += Select(*pred, remaining_stride_in_bytes, 0); // possibly emit stride jump
1487 } else {
1488 value.ptr += remaining_stride_in_bytes; // emit stride jump
1489 }
1490 }
1491 if (null_bitmap_ptr) {
1492 if (is_predicated) {
1493 M_insist(bool(pred));
1494 *null_bitmap_ptr +=
1495 Select(*pred, remaining_stride_in_bytes, 0); // possibly emit stride jump
1496 } else {
1497 *null_bitmap_ptr += remaining_stride_in_bytes; // emit stride jump
1498 }
1499 }
1500
1501 /*----- Recurse within IF. -----*/
1502 rec(std::next(curr), end, rec);
1503 }
1504 } else {
1505 /*----- Recurse without IF. -----*/
1506 rec(std::next(curr), end, rec);
1507 }
1508
1509 };
1510 rec(curr, end, rec);
1511 };
1512
1513 /*----- Process path from DataLayout leaves to the root to emit stride jumps. -----*/
1514 BLOCK_OPEN(jumps) {
1515 /*----- Emit the per-leaf stride jumps, i.e. from one instance of the leaf to the next. -----*/
1516 for (auto &[key, value] : loading_context) {
1517 const uint8_t bit_stride = key.second % 8;
1518 const int64_t byte_stride = key.second / 8;
1519 if (bit_stride) {
1520 M_insist(L == 1);
1521 M_insist(bool(value.mask));
1522 if (is_predicated) {
1523 M_insist(bool(pred));
1524 *value.mask <<= Select(*pred, bit_stride, uint8_t(0)); // possibly advance mask
1525 } else {
1526 *value.mask <<= bit_stride; // advance mask
1527 }
1528 /* If the mask surpasses the first byte, advance pointer to the next byte... */
1529 value.ptr += (*value.mask bitand 0xffU).eqz().template to<int64_t>();
1530 /* ... and remove the lowest byte from the mask. */
1531 *value.mask = Select((*value.mask bitand 0xffU).eqz(), *value.mask >> 8U, *value.mask);
1532 }
1533 if (byte_stride) [[likely]] {
1534 if (is_predicated) {
1535 M_insist(L == 1);
1536 M_insist(bool(pred));
1537 value.ptr += Select(*pred, byte_stride, 0); // possibly advance pointer
1538 } else {
1539 value.ptr += int64_t(L) * byte_stride; // advance pointer
1540 }
1541 }
1542 }
1543 /* Omit the leaf stride jump for the NULL bitmap as it is already done together with the loading. */
1544
1545 if (not levels.empty()) {
1546 /*----- Emit the stride jumps between each leaf to the beginning of the parent INode. -----*/
1547 Block lowest_inode_jumps(false);
1548 for (auto &[key, value] : loading_context) {
1549 M_insist(levels.back().stride_in_bits % 8 == 0,
1550 "stride of INodes must be multiples of a whole byte");
1551 const auto stride_remaining_in_bits = levels.back().stride_in_bits -
1552 levels.back().num_tuples * key.second;
1553 const uint8_t remaining_bit_stride = stride_remaining_in_bits % 8;
1554 const int64_t remaining_byte_stride = stride_remaining_in_bits / 8;
1555 if (remaining_bit_stride) {
1556 M_insist(L == 1);
1557 M_insist(bool(value.mask));
1558 BLOCK_OPEN(lowest_inode_jumps) {
1559 const uint8_t end_bit_offset = (key.first + levels.back().num_tuples * key.second) % 8;
1560 M_insist(end_bit_offset != key.first);
1561 /* Reset the mask to initial bit offset... */
1562 if (is_predicated) {
1563 M_insist(bool(pred));
1564 Wasm_insist(*pred or *value.mask == 1U << key.first,
1565 "if the predicate is not fulfilled, the mask should not be advanced");
1566 }
1567 *value.mask = 1U << key.first;
1568 /* ... and advance pointer to next byte if resetting of the mask surpasses the current byte. */
1569 if (is_predicated) {
1570 M_insist(bool(pred));
1571 value.ptr += Select(*pred, int64_t(end_bit_offset > key.first), 0);
1572 } else {
1573 value.ptr += int64_t(end_bit_offset > key.first);
1574 }
1575 }
1576 }
1577 if (remaining_byte_stride) [[likely]] {
1578 BLOCK_OPEN(lowest_inode_jumps) {
1579 if (is_predicated) {
1580 M_insist(bool(pred));
1581 value.ptr +=
1582 Select(*pred, remaining_byte_stride, 0); // possibly advance pointer
1583 } else {
1584 value.ptr += remaining_byte_stride; // advance pointer
1585 }
1586 }
1587 }
1588 }
1589 if (null_bitmap_ptr) {
1590 M_insist(L == 1);
1591 M_insist(bool(null_bitmap_mask));
1592 M_insist(levels.back().stride_in_bits % 8 == 0,
1593 "stride of INodes must be multiples of a whole byte");
1594 const auto stride_remaining_in_bits = levels.back().stride_in_bits -
1595 levels.back().num_tuples * null_bitmap_stride_in_bits;
1596 const uint8_t remaining_bit_stride = stride_remaining_in_bits % 8;
1597 const int64_t remaining_byte_stride = stride_remaining_in_bits / 8;
1598 if (remaining_bit_stride) {
1599 BLOCK_OPEN(lowest_inode_jumps) {
1600 const uint8_t end_bit_offset =
1601 (null_bitmap_bit_offset + levels.back().num_tuples * null_bitmap_stride_in_bits) % 8;
1602 M_insist(end_bit_offset != null_bitmap_bit_offset);
1603 /* Reset the mask to initial bit offset... */
1604 if (is_predicated) {
1605 M_insist(bool(pred));
1606 Wasm_insist(*pred or *null_bitmap_mask == 1U << null_bitmap_bit_offset,
1607 "if the predicate is not fulfilled, the mask should not be advanced");
1608 }
1609 *null_bitmap_mask = 1U << null_bitmap_bit_offset;
1610 /* ... and advance pointer to next byte if resetting of the mask surpasses the current byte. */
1611 if (is_predicated) {
1612 M_insist(bool(pred));
1613 *null_bitmap_ptr +=
1614 Select(*pred, int64_t(end_bit_offset > null_bitmap_bit_offset), 0);
1615 } else {
1616 *null_bitmap_ptr += int64_t(end_bit_offset > null_bitmap_bit_offset);
1617 }
1618 }
1619 }
1620 if (remaining_byte_stride) [[likely]] {
1621 BLOCK_OPEN(lowest_inode_jumps) {
1622 if (is_predicated) {
1623 M_insist(bool(pred));
1624 *null_bitmap_ptr +=
1625 Select(*pred, remaining_byte_stride, 0); // possibly advance pointer
1626 } else {
1627 *null_bitmap_ptr += remaining_byte_stride; // advance pointer
1628 }
1629 }
1630 }
1631 }
1632
1633 /*----- Emit the stride jumps between all INodes starting at the parent of leaves to the root. -----*/
1634 if (not lowest_inode_jumps.empty()) [[likely]] {
1635 M_insist(levels.back().num_tuples > 0);
1636 if (levels.back().num_tuples != 1U) {
1637 Boolx1 cond_mod = (tuple_id % uint64_t(levels.back().num_tuples)).eqz();
1638 Boolx1 cond_and = (tuple_id bitand uint64_t(levels.back().num_tuples - 1U)).eqz();
1639 const bool use_and = is_pow_2(levels.back().num_tuples) and options::remainder_removal;
1640 Boolx1 cond = use_and ? cond_and : cond_mod; // select implementation to use...
1641 (use_and ? cond_mod : cond_and).discard(); // ... and discard the other
1642
1643 /*----- Emit conditional stride jumps from outermost Block. -----*/
1644 IF (cond) {
1645 lowest_inode_jumps.attach_to_current();
1646
1647 /*----- Recurse within IF. -----*/
1648 emit_stride_jumps(std::next(levels.crbegin()), levels.crend());
1649 };
1650 } else {
1651 lowest_inode_jumps.attach_to_current();
1652
1653 /*----- Recurse within IF. -----*/
1654 emit_stride_jumps(std::next(levels.crbegin()), levels.crend());
1655 }
1656 } else {
1657 /*----- Recurse without outermost IF block. -----*/
1658 emit_stride_jumps(std::next(levels.crbegin()), levels.crend());
1659 }
1660 }
1661 }
1662 });
1663
1664 if constexpr (not IsStore) {
1665 /*----- Combine actual values and possible NULL bits to a new `SQL_t` and add this to the environment. -----*/
1666 for (std::size_t idx = 0; idx != tuple_value_schema.num_entries(); ++idx) {
1667 auto &tuple_entry = tuple_value_schema[idx];
1668 std::visit(overloaded{
1669 [&]<typename T>(Expr<T, L> value) {
1670 BLOCK_OPEN(loads) {
1671 if (has_null_bitmap and layout_schema[tuple_entry.id].second.nullable()) {
1672 Expr<T, L> combined(value.insist_not_null(), null_bits[idx]);
1673 env.add(tuple_entry.id, combined);
1674 } else {
1675 env.add(tuple_entry.id, value);
1676 }
1677 }
1678 },
1679 [&](NChar value) {
1680 if constexpr (L == 1) {
1681 BLOCK_OPEN(loads) {
1682 if (has_null_bitmap and layout_schema[tuple_entry.id].second.nullable()) {
1683 /* introduce variable s.t. uses only load from it */
1684 Var<Ptr<Charx1>> combined(Select(null_bits[idx], Ptr<Charx1>::Nullptr(), value.val()));
1685 env.add(tuple_entry.id, NChar(combined, /* can_be_null=*/ true, value.length(),
1686 value.guarantees_terminating_nul()));
1687 } else {
1688 Var<Ptr<Charx1>> _value(value.val()); // introduce variable s.t. uses only load from it
1689 env.add(tuple_entry.id, NChar(_value, /* can_be_null=*/ false, value.length(),
1690 value.guarantees_terminating_nul()));
1691 }
1692 }
1693 } else {
1694 M_unreachable("string SIMDfication currently not supported");
1695 }
1696 },
1697 [](auto) { M_unreachable("value must be loaded beforehand"); },
1698 [](std::monostate) { M_unreachable("invalid variant"); },
1699 }, values[idx]);
1700 }
1701
1702 /*----- Add addresses to the environment. -----*/
1703 for (std::size_t idx = 0; idx != tuple_addr_schema.num_entries(); ++idx) {
1704 BLOCK_OPEN(loads) {
1705 auto &tuple_entry = tuple_addr_schema[idx];
1706 env.add_addr(tuple_entry.id, std::move(addrs[idx]));
1707 }
1708 }
1709 }
1710
1711 /*----- Destroy created values. -----*/
1712 for (std::size_t idx = 0; idx < tuple_value_schema.num_entries(); ++idx)
1713 values[idx].~SQL_t();
1714 for (std::size_t idx = 0; idx < tuple_addr_schema.num_entries(); ++idx)
1715 addrs[idx].~SQL_addr_t();
1716 if constexpr (not IsStore) {
1717 /*----- Destroy created NULL bits. -----*/
1718 for (std::size_t idx = 0; idx != tuple_value_schema.num_entries(); ++idx) {
1719 if (has_null_bitmap and layout_schema[tuple_value_schema[idx].id].second.nullable())
1720 null_bits[idx].~Bool<L>();
1721 }
1722 }
1723 base_address.discard(); // discard base address (as it was always cloned)
1724
1725#ifndef NDEBUG
1726 if constexpr (IsStore)
1727 M_insist(loads.empty());
1728 else
1729 M_insist(stores.empty());
1730#endif
1731
1732 if constexpr (IsStore)
1733 return std::make_tuple<Block, Block, Block>(std::move(inits), std::move(stores), std::move(jumps));
1734 else
1735 return std::make_tuple<Block, Block, Block>(std::move(inits), std::move(loads), std::move(jumps));
1736}
1737
1738}
1739
1740}
1741
1742template<VariableKind Kind>
1743std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block>
1744m::wasm::compile_store_sequential(const Schema &tuple_value_schema, const Schema &tuple_addr_schema,
1745 Ptr<void> base_address, const storage::DataLayout &layout, std::size_t num_simd_lanes,
1747{
1748 if (options::pointer_sharing) {
1749 switch (num_simd_lanes) {
1750 default: M_unreachable("unsupported number of SIMD lanes");
1751 case 1: return compile_data_layout_sequential<true, 1, false, true>(tuple_value_schema, tuple_addr_schema,
1752 base_address, layout, layout_schema,
1753 tuple_id);
1754 case 2: return compile_data_layout_sequential<true, 2, false, true>(tuple_value_schema, tuple_addr_schema,
1755 base_address, layout, layout_schema,
1756 tuple_id);
1757 case 4: return compile_data_layout_sequential<true, 4, false, true>(tuple_value_schema, tuple_addr_schema,
1758 base_address, layout, layout_schema,
1759 tuple_id);
1760 case 8: return compile_data_layout_sequential<true, 8, false, true>(tuple_value_schema, tuple_addr_schema,
1761 base_address, layout, layout_schema,
1762 tuple_id);
1763 case 16: return compile_data_layout_sequential<true, 16, false, true>(tuple_value_schema, tuple_addr_schema,
1764 base_address, layout, layout_schema,
1765 tuple_id);
1766 case 32: return compile_data_layout_sequential<true, 32, false, true>(tuple_value_schema, tuple_addr_schema,
1767 base_address, layout, layout_schema,
1768 tuple_id);
1769 }
1770 } else {
1771 switch (num_simd_lanes) {
1772 default: M_unreachable("unsupported number of SIMD lanes");
1773 case 1: return compile_data_layout_sequential<true, 1, false, false>(tuple_value_schema,
1774 tuple_addr_schema, base_address,
1775 layout, layout_schema, tuple_id);
1776 case 2: return compile_data_layout_sequential<true, 2, false, false>(tuple_value_schema,
1777 tuple_addr_schema, base_address,
1778 layout, layout_schema, tuple_id);
1779 case 4: return compile_data_layout_sequential<true, 4, false, false>(tuple_value_schema,
1780 tuple_addr_schema, base_address,
1781 layout, layout_schema, tuple_id);
1782 case 8: return compile_data_layout_sequential<true, 8, false, false>(tuple_value_schema,
1783 tuple_addr_schema, base_address,
1784 layout, layout_schema, tuple_id);
1785 case 16: return compile_data_layout_sequential<true, 16, false, false>(tuple_value_schema,
1786 tuple_addr_schema, base_address,
1787 layout, layout_schema, tuple_id);
1788 case 32: return compile_data_layout_sequential<true, 32, false, false>(tuple_value_schema,
1789 tuple_addr_schema, base_address,
1790 layout, layout_schema, tuple_id);
1791 }
1792 }
1793}
1794
1795template<VariableKind Kind>
1796std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block>
1797m::wasm::compile_store_sequential_single_pass(const Schema &tuple_value_schema, const Schema &tuple_addr_schema,
1798 Ptr<void> base_address, const storage::DataLayout &layout,
1799 std::size_t num_simd_lanes, const Schema &layout_schema,
1801{
1802 if (options::pointer_sharing) {
1803 switch (num_simd_lanes) {
1804 default: M_unreachable("unsupported number of SIMD lanes");
1805 case 1: return compile_data_layout_sequential<true, 1, true, true>(tuple_value_schema, tuple_addr_schema,
1806 base_address, layout, layout_schema,
1807 tuple_id);
1808 case 2: return compile_data_layout_sequential<true, 2, true, true>(tuple_value_schema, tuple_addr_schema,
1809 base_address, layout, layout_schema,
1810 tuple_id);
1811 case 4: return compile_data_layout_sequential<true, 4, true, true>(tuple_value_schema, tuple_addr_schema,
1812 base_address, layout, layout_schema,
1813 tuple_id);
1814 case 8: return compile_data_layout_sequential<true, 8, true, true>(tuple_value_schema, tuple_addr_schema,
1815 base_address, layout, layout_schema,
1816 tuple_id);
1817 case 16: return compile_data_layout_sequential<true, 16, true, true>(tuple_value_schema, tuple_addr_schema,
1818 base_address, layout, layout_schema,
1819 tuple_id);
1820 case 32: return compile_data_layout_sequential<true, 32, true, true>(tuple_value_schema, tuple_addr_schema,
1821 base_address, layout, layout_schema,
1822 tuple_id);
1823 }
1824 } else {
1825 switch (num_simd_lanes) {
1826 default: M_unreachable("unsupported number of SIMD lanes");
1827 case 1: return compile_data_layout_sequential<true, 1, true, false>(tuple_value_schema,
1828 tuple_addr_schema, base_address,
1829 layout, layout_schema, tuple_id);
1830 case 2: return compile_data_layout_sequential<true, 2, true, false>(tuple_value_schema,
1831 tuple_addr_schema, base_address,
1832 layout, layout_schema, tuple_id);
1833 case 4: return compile_data_layout_sequential<true, 4, true, false>(tuple_value_schema,
1834 tuple_addr_schema, base_address,
1835 layout, layout_schema, tuple_id);
1836 case 8: return compile_data_layout_sequential<true, 8, true, false>(tuple_value_schema,
1837 tuple_addr_schema, base_address,
1838 layout, layout_schema, tuple_id);
1839 case 16: return compile_data_layout_sequential<true, 16, true, false>(tuple_value_schema,
1840 tuple_addr_schema, base_address,
1841 layout, layout_schema, tuple_id);
1842 case 32: return compile_data_layout_sequential<true, 32, true, false>(tuple_value_schema,
1843 tuple_addr_schema, base_address,
1844 layout, layout_schema, tuple_id);
1845 }
1846 }
1847}
1848
1849template<VariableKind Kind>
1850std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block>
1851m::wasm::compile_load_sequential(const Schema &tuple_value_schema, const Schema &tuple_addr_schema,
1852 Ptr<void> base_address, const storage::DataLayout &layout, std::size_t num_simd_lanes,
1854{
1855 if (options::pointer_sharing) {
1856 switch (num_simd_lanes) {
1857 default: M_unreachable("unsupported number of SIMD lanes");
1858 case 1: return compile_data_layout_sequential<false, 1, true, true>(tuple_value_schema, tuple_addr_schema,
1859 base_address, layout, layout_schema,
1860 tuple_id);
1861 case 2: return compile_data_layout_sequential<false, 2, true, true>(tuple_value_schema, tuple_addr_schema,
1862 base_address, layout, layout_schema,
1863 tuple_id);
1864 case 4: return compile_data_layout_sequential<false, 4, true, true>(tuple_value_schema, tuple_addr_schema,
1865 base_address, layout, layout_schema,
1866 tuple_id);
1867 case 8: return compile_data_layout_sequential<false, 8, true, true>(tuple_value_schema, tuple_addr_schema,
1868 base_address, layout, layout_schema,
1869 tuple_id);
1870 case 16: return compile_data_layout_sequential<false, 16, true, true>(tuple_value_schema, tuple_addr_schema,
1871 base_address, layout, layout_schema,
1872 tuple_id);
1873 case 32: return compile_data_layout_sequential<false, 32, true, true>(tuple_value_schema, tuple_addr_schema,
1874 base_address, layout, layout_schema,
1875 tuple_id);
1876 }
1877 } else {
1878 switch (num_simd_lanes) {
1879 default: M_unreachable("unsupported number of SIMD lanes");
1880 case 1: return compile_data_layout_sequential<false, 1, true, false>(tuple_value_schema,
1881 tuple_addr_schema, base_address,
1882 layout, layout_schema, tuple_id);
1883 case 2: return compile_data_layout_sequential<false, 2, true, false>(tuple_value_schema,
1884 tuple_addr_schema, base_address,
1885 layout, layout_schema, tuple_id);
1886 case 4: return compile_data_layout_sequential<false, 4, true, false>(tuple_value_schema,
1887 tuple_addr_schema, base_address,
1888 layout, layout_schema, tuple_id);
1889 case 8: return compile_data_layout_sequential<false, 8, true, false>(tuple_value_schema,
1890 tuple_addr_schema, base_address,
1891 layout, layout_schema, tuple_id);
1892 case 16: return compile_data_layout_sequential<false, 16, true, false>(tuple_value_schema,
1893 tuple_addr_schema, base_address,
1894 layout, layout_schema, tuple_id);
1895 case 32: return compile_data_layout_sequential<false, 32, true, false>(tuple_value_schema,
1896 tuple_addr_schema, base_address,
1897 layout, layout_schema, tuple_id);
1898 }
1899 }
1900}
1901
1902// explicit instantiations to prevent linker errors
1903template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential(
1904 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Var<U64x1>&
1905);
1906template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential(
1907 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Global<U64x1>&
1908);
1909template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential(
1910 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&,
1912);
1913template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential_single_pass(
1914 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Var<U64x1>&
1915);
1916template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential_single_pass(
1917 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Global<U64x1>&
1918);
1919template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_store_sequential_single_pass(
1920 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&,
1922);
1923template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_load_sequential(
1924 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Var<U64x1>&
1925);
1926template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_load_sequential(
1927 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&, Global<U64x1>&
1928);
1929template std::tuple<m::wasm::Block, m::wasm::Block, m::wasm::Block> m::wasm::compile_load_sequential(
1930 const Schema&, const Schema&, Ptr<void>, const storage::DataLayout&, std::size_t, const Schema&,
1932);
1933
1934namespace m {
1935
1936namespace wasm {
1937
1944template<bool IsStore>
1945void compile_data_layout_point_access(const Schema &_tuple_value_schema, const Schema &_tuple_addr_schema,
1946 Ptr<void> base_address, const storage::DataLayout &layout,
1947 const Schema &layout_schema, U64x1 tuple_id)
1948{
1949 const auto tuple_value_schema = _tuple_value_schema.deduplicate().drop_constants();
1950 const auto tuple_addr_schema = _tuple_addr_schema.deduplicate().drop_constants();
1951
1952 M_insist(tuple_value_schema.num_entries() != 0, "point access must access at least one tuple schema entry");
1953 M_insist(not IsStore or tuple_addr_schema.num_entries() == 0, "addresses are only computed for loads");
1954#ifndef NDEBUG
1955 for (auto &e : tuple_value_schema)
1956 M_insist(layout_schema.find(e.id) != layout_schema.cend(), "tuple value schema entry not found");
1957 for (auto &e : tuple_addr_schema) {
1958 auto it = layout_schema.find(e.id);
1959 M_insist(it != layout_schema.cend(), "tuple address schema entry not found");
1960 M_insist(not it->nullable(), "nullable tuple address schema entry not yet supported");
1961 M_insist(not it->type->is_boolean(), "boolean tuple address schema entry not yet supported");
1962 M_insist(not it->type->is_character_sequence(), "character sequence tuple address schema entry omitted");
1963 }
1964#endif
1965
1967 SQL_t values[tuple_value_schema.num_entries()];
1969 SQL_addr_t *addrs;
1970 if (not tuple_addr_schema.empty())
1971 addrs = static_cast<SQL_addr_t*>(alloca(sizeof(SQL_addr_t) * tuple_addr_schema.num_entries()));
1973 Boolx1 *null_bits;
1974 if constexpr (not IsStore)
1975 null_bits = static_cast<Boolx1*>(alloca(sizeof(Boolx1) * tuple_value_schema.num_entries()));
1976
1977 auto &env = CodeGenContext::Get().env(); // the current codegen environment
1978
1979 /*----- Check whether any of the entries in `tuple_value_schema` can be NULL, so that we need the NULL bitmap. -----*/
1980 const bool needs_null_bitmap = [&]() {
1981 for (auto &tuple_entry : tuple_value_schema) {
1982 if (layout_schema[tuple_entry.id].second.nullable())
1983 return true; // found an entry in `tuple_value_schema` that can be NULL according to `layout_schema`
1984 }
1985 return false; // no attribute in `tuple_value_schema` can be NULL according to `layout_schema`
1986 }();
1987 bool has_null_bitmap = false; // indicates whether the data layout specifies a NULL bitmap
1988
1989 /*----- Visit the data layout. -----*/
1990 layout.for_sibling_leaves([&](const std::vector<DataLayout::leaf_info_t> &leaves,
1991 const DataLayout::level_info_stack_t &levels, uint64_t inode_offset_in_bits)
1992 {
1993 /*----- Compute INode pointer and INode iteration depending on the given tuple ID. -----*/
1994 auto compute_additional_inode_byte_offset = [&](U64x1 tuple_id) -> U64x1 {
1995 auto rec = [&](U64x1 curr_tuple_id, decltype(levels.cbegin()) curr, const decltype(levels.cend()) end,
1996 auto rec) -> U64x1
1997 {
1998 if (curr == end) {
1999 Wasm_insist(curr_tuple_id == tuple_id % uint64_t(levels.back().num_tuples));
2000 return U64x1(0);
2001 }
2002
2003 if (is_pow_2(curr->num_tuples)) {
2004 U64x1 child_iter = curr_tuple_id.clone() >> uint64_t(__builtin_ctzl(curr->num_tuples));
2005 U64x1 inner_tuple_id = curr_tuple_id bitand uint64_t(curr->num_tuples - 1U);
2006 M_insist(curr->stride_in_bits % 8 == 0, "INode stride must be byte aligned");
2007 U64x1 offset_in_bytes = child_iter * uint64_t(curr->stride_in_bits / 8);
2008 return offset_in_bytes + rec(inner_tuple_id, std::next(curr), end, rec);
2009 } else {
2010 U64x1 child_iter = curr_tuple_id.clone() / uint64_t(curr->num_tuples);
2011 U64x1 inner_tuple_id = curr_tuple_id % uint64_t(curr->num_tuples);
2012 M_insist(curr->stride_in_bits % 8 == 0, "INode stride must be byte aligned");
2013 U64x1 offset_in_bytes = child_iter * uint64_t(curr->stride_in_bits / 8);
2014 return offset_in_bytes + rec(inner_tuple_id, std::next(curr), end, rec);
2015 }
2016 };
2017 return rec(tuple_id.clone(), levels.cbegin(), levels.cend(), rec);
2018 };
2019 M_insist(inode_offset_in_bits % 8 == 0, "INode offset must be byte aligned");
2020 const Var<Ptr<void>> inode_ptr(
2021 base_address.clone()
2022 + int64_t(inode_offset_in_bits / 8)
2023 + compute_additional_inode_byte_offset(tuple_id.clone()).make_signed()
2024 );
2025 std::optional<const Var<U64x1>> inode_iter;
2026 M_insist(levels.back().num_tuples != 0, "INode must be large enough for at least one tuple");
2027 if (levels.back().num_tuples != 1) {
2028 inode_iter.emplace(
2029 is_pow_2(levels.back().num_tuples) ? tuple_id bitand uint64_t(levels.back().num_tuples - 1U)
2030 : tuple_id % uint64_t(levels.back().num_tuples)
2031 );
2032 } else {
2033 /* omit computation of INode iteration since it is always the first iteration, i.e. equals 0 */
2034 tuple_id.discard();
2035 }
2036
2037 /*----- Iterate over sibling leaves, i.e. leaf children of a common parent INode, to emit code. -----*/
2038 for (auto &leaf_info : leaves) {
2039 const uint8_t bit_stride = leaf_info.stride_in_bits % 8;
2040
2041 if (leaf_info.leaf.index() == layout_schema.num_entries()) { // NULL bitmap
2042 if (not needs_null_bitmap)
2043 continue;
2044
2045 M_insist(not has_null_bitmap, "at most one bitmap may be specified");
2046 has_null_bitmap = true;
2047 if (bit_stride) { // NULL bitmap with bit stride requires dynamic masking
2048 M_insist(bool(inode_iter), "stride requires repetition");
2049 U64x1 leaf_offset_in_bits = leaf_info.offset_in_bits + *inode_iter * leaf_info.stride_in_bits;
2050 const Var<U8x1> leaf_bit_offset(
2051 (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>() // mod 8
2052 );
2053 I64x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed(); // div 8
2054
2055 const Var<Ptr<void>> ptr(inode_ptr + leaf_byte_offset); // pointer to NULL bitmap
2056
2057 /*----- For each tuple entry that can be NULL, create a store/load with offset and mask. --*/
2058 for (std::size_t tuple_idx = 0; tuple_idx != tuple_value_schema.num_entries(); ++tuple_idx) {
2059 auto &tuple_entry = tuple_value_schema[tuple_idx];
2060 const auto &[layout_idx, layout_entry] = layout_schema[tuple_entry.id];
2061 M_insist(*tuple_entry.type == *layout_entry.type);
2062 if (layout_entry.nullable()) { // layout entry may be NULL
2063 U64x1 offset_in_bits = leaf_bit_offset + layout_idx;
2064 U8x1 bit_offset = (offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>() ; // mod 8
2065 I64x1 byte_offset = (offset_in_bits >> uint64_t(3)).make_signed(); // div 8
2066 if constexpr (IsStore) {
2067 /*----- Store NULL bit depending on its type. -----*/
2068 auto store = [&]<typename T>() {
2069 auto [value, is_null] = env.get<T>(tuple_entry.id).split(); // get value
2070 value.discard(); // handled at entry leaf
2071 Ptr<U8x1> byte_ptr =
2072 (ptr + byte_offset).template to<uint8_t*>(); // compute byte address
2073 setbit<U8x1>(byte_ptr, is_null, uint8_t(1) << bit_offset); // update bit
2074 };
2076 [&](const Boolean&) { store.template operator()<_Boolx1>(); },
2077 [&](const Numeric &n) {
2078 switch (n.kind) {
2079 case Numeric::N_Int:
2080 case Numeric::N_Decimal:
2081 switch (n.size()) {
2082 default: M_unreachable("invalid size");
2083 case 8: store.template operator()<_I8x1 >(); break;
2084 case 16: store.template operator()<_I16x1>(); break;
2085 case 32: store.template operator()<_I32x1>(); break;
2086 case 64: store.template operator()<_I64x1>(); break;
2087 }
2088 break;
2089 case Numeric::N_Float:
2090 if (n.size() <= 32)
2091 store.template operator()<_Floatx1>();
2092 else
2093 store.template operator()<_Doublex1>();
2094 }
2095 },
2096 [&](const CharacterSequence&) {
2097 auto value = env.get<NChar>(tuple_entry.id); // get value
2098 Ptr<U8x1> byte_ptr =
2099 (ptr + byte_offset).template to<uint8_t*>(); // compute byte address
2100 setbit<U8x1>(byte_ptr, value.is_null(), uint8_t(1) << bit_offset); // update bit
2101 },
2102 [&](const Date&) { store.template operator()<_I32x1>(); },
2103 [&](const DateTime&) { store.template operator()<_I64x1>(); },
2104 [](auto&&) { M_unreachable("invalid type"); },
2105 }, *tuple_entry.type);
2106 } else {
2107 /*----- Load NULL bit. -----*/
2108 U8x1 byte = *(ptr + byte_offset).template to<uint8_t*>(); // load the byte
2109 Var<Boolx1> value((byte bitand (uint8_t(1) << bit_offset)).to<bool>()); // mask bit
2110 new (&null_bits[tuple_idx]) Boolx1(value);
2111 /* Address for NULL bits not yet supported. */
2112 }
2113 } else { // entry must not be NULL
2114#ifndef NDEBUG
2115 if constexpr (IsStore) {
2116 /*----- Check that value is also not NULL. -----*/
2117 auto check = [&]<typename T>() {
2118 Wasm_insist(env.get<T>(tuple_entry.id).not_null(),
2119 "value of non-nullable entry must not be nullable");
2120 };
2122 [&](const Boolean&) { check.template operator()<_Boolx1>(); },
2123 [&](const Numeric &n) {
2124 switch (n.kind) {
2125 case Numeric::N_Int:
2126 case Numeric::N_Decimal:
2127 switch (n.size()) {
2128 default: M_unreachable("invalid size");
2129 case 8: check.template operator()<_I8x1 >(); break;
2130 case 16: check.template operator()<_I16x1>(); break;
2131 case 32: check.template operator()<_I32x1>(); break;
2132 case 64: check.template operator()<_I64x1>(); break;
2133 }
2134 break;
2135 case Numeric::N_Float:
2136 if (n.size() <= 32)
2137 check.template operator()<_Floatx1>();
2138 else
2139 check.template operator()<_Doublex1>();
2140 }
2141 },
2142 [&](const CharacterSequence&) { check.template operator()<NChar>(); },
2143 [&](const Date&) { check.template operator()<_I32x1>(); },
2144 [&](const DateTime&) { check.template operator()<_I64x1>(); },
2145 [](auto&&) { M_unreachable("invalid type"); },
2146 }, *tuple_entry.type);
2147 }
2148#endif
2149 }
2150 }
2151 } else { // NULL bitmap without bit stride can benefit from static masking of NULL bits
2152 auto ptr = [&]() -> Ptr<void> {
2153 if (inode_iter and leaf_info.stride_in_bits) {
2154 /* omit `leaf_info.offset_in_bits` here to add it to the static offsets and masks;
2155 * this is valid since no bit stride means that the leaf byte offset computation is
2156 * independent of the static parts */
2157 U64x1 leaf_offset_in_bits = *inode_iter * leaf_info.stride_in_bits;
2158 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>(); // mod 8
2159 I64x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed(); // div 8
2160 Wasm_insist(leaf_bit_offset == 0U, "no leaf bit offset without bit stride");
2161 const Var<Ptr<void>> ptr(inode_ptr + leaf_byte_offset);
2162 return ptr;
2163 } else {
2164 return inode_ptr;
2165 }
2166 }(); // pointer to NULL bitmap
2167
2168 /*----- For each tuple entry that can be NULL, create a store/load with offset and mask. --*/
2169 for (std::size_t tuple_idx = 0; tuple_idx != tuple_value_schema.num_entries(); ++tuple_idx) {
2170 auto &tuple_entry = tuple_value_schema[tuple_idx];
2171 const auto &[layout_idx, layout_entry] = layout_schema[tuple_entry.id];
2172 M_insist(*tuple_entry.type == *layout_entry.type);
2173 if (layout_entry.nullable()) { // layout entry may be NULL
2174 const uint8_t static_bit_offset = (leaf_info.offset_in_bits + layout_idx) % 8;
2175 const int64_t static_byte_offset = (leaf_info.offset_in_bits + layout_idx) / 8;
2176 if constexpr (IsStore) {
2177 /*----- Store NULL bit depending on its type. -----*/
2178 auto store = [&]<typename T>() {
2179 auto [value, is_null] = env.get<T>(tuple_entry.id).split(); // get value
2180 value.discard(); // handled at entry leaf
2181 Ptr<U8x1> byte_ptr =
2182 (ptr.clone() + static_byte_offset).template to<uint8_t*>(); // compute byte address
2183 setbit<U8x1>(byte_ptr, is_null, static_bit_offset); // update bit
2184 };
2186 [&](const Boolean&) { store.template operator()<_Boolx1>(); },
2187 [&](const Numeric &n) {
2188 switch (n.kind) {
2189 case Numeric::N_Int:
2190 case Numeric::N_Decimal:
2191 switch (n.size()) {
2192 default: M_unreachable("invalid size");
2193 case 8: store.template operator()<_I8x1 >(); break;
2194 case 16: store.template operator()<_I16x1>(); break;
2195 case 32: store.template operator()<_I32x1>(); break;
2196 case 64: store.template operator()<_I64x1>(); break;
2197 }
2198 break;
2199 case Numeric::N_Float:
2200 if (n.size() <= 32)
2201 store.template operator()<_Floatx1>();
2202 else
2203 store.template operator()<_Doublex1>();
2204 }
2205 },
2206 [&](const CharacterSequence&) {
2207 auto value = env.get<NChar>(tuple_entry.id); // get value
2208 Ptr<U8x1> byte_ptr =
2209 (ptr.clone() + static_byte_offset).template to<uint8_t*>(); // compute byte address
2210 setbit<U8x1>(byte_ptr, value.is_null(), static_bit_offset); // update bit
2211 },
2212 [&](const Date&) { store.template operator()<_I32x1>(); },
2213 [&](const DateTime&) { store.template operator()<_I64x1>(); },
2214 [](auto&&) { M_unreachable("invalid type"); },
2215 }, *tuple_entry.type);
2216 } else {
2217 /*----- Load NULL bit. -----*/
2218 U8x1 byte = *(ptr.clone() + static_byte_offset).template to<uint8_t*>(); // load the byte
2219 const uint8_t static_mask = 1U << static_bit_offset;
2220 Var<Boolx1> value((byte bitand static_mask).to<bool>()); // mask bit
2221 new (&null_bits[tuple_idx]) Boolx1(value);
2222 /* Address for NULL bits not yet supported. */
2223 }
2224 } else { // entry must not be NULL
2225#ifndef NDEBUG
2226 if constexpr (IsStore) {
2227 /*----- Check that value is also not NULL. -----*/
2228 auto check = [&]<typename T>() {
2229 Wasm_insist(env.get<T>(tuple_entry.id).not_null(),
2230 "value of non-nullable entry must not be nullable");
2231 };
2233 [&](const Boolean&) { check.template operator()<_Boolx1>(); },
2234 [&](const Numeric &n) {
2235 switch (n.kind) {
2236 case Numeric::N_Int:
2237 case Numeric::N_Decimal:
2238 switch (n.size()) {
2239 default: M_unreachable("invalid size");
2240 case 8: check.template operator()<_I8x1 >(); break;
2241 case 16: check.template operator()<_I16x1>(); break;
2242 case 32: check.template operator()<_I32x1>(); break;
2243 case 64: check.template operator()<_I64x1>(); break;
2244 }
2245 break;
2246 case Numeric::N_Float:
2247 if (n.size() <= 32)
2248 check.template operator()<_Floatx1>();
2249 else
2250 check.template operator()<_Doublex1>();
2251 }
2252 },
2253 [&](const CharacterSequence&) { check.template operator()<NChar>(); },
2254 [&](const Date&) { check.template operator()<_I32x1>(); },
2255 [&](const DateTime&) { check.template operator()<_I64x1>(); },
2256 [](auto&&) { M_unreachable("invalid type"); },
2257 }, *tuple_entry.type);
2258 }
2259#endif
2260 }
2261 }
2262 ptr.discard(); // since it was always cloned
2263 }
2264 } else { // regular entry
2265 auto &layout_entry = layout_schema[leaf_info.leaf.index()];
2266 M_insist(*layout_entry.type == *leaf_info.leaf.type());
2267 auto tuple_value_it = tuple_value_schema.find(layout_entry.id);
2268 auto tuple_addr_it = tuple_addr_schema.find(layout_entry.id);
2269 if (tuple_value_it == tuple_value_schema.end() and tuple_addr_it == tuple_addr_schema.end())
2270 continue; // entry not contained in both tuple schemas
2271 auto tuple_it = tuple_value_it != tuple_value_schema.end() ? tuple_value_it : tuple_addr_it;
2272 M_insist(*tuple_it->type == *layout_entry.type);
2273 const auto tuple_value_idx = std::distance(tuple_value_schema.begin(), tuple_value_it);
2274 const auto tuple_addr_idx = std::distance(tuple_addr_schema.begin(), tuple_addr_it);
2275
2276 if (bit_stride) { // entry with bit stride requires dynamic masking
2277 M_insist(tuple_it->type->is_boolean(), "leaf bit stride currently only for `Boolean` supported");
2278
2279 M_insist(bool(inode_iter), "stride requires repetition");
2280 U64x1 leaf_offset_in_bits = leaf_info.offset_in_bits + *inode_iter * leaf_info.stride_in_bits;
2281 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>() ; // mod 8
2282 I64x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed(); // div 8
2283
2284 Ptr<U8x1> byte_ptr = (inode_ptr + leaf_byte_offset).template to<uint8_t*>();
2285 U8x1 mask = uint8_t(1) << leaf_bit_offset;
2286
2287 if constexpr (IsStore) {
2288 /*----- Store value. -----*/
2289 auto [value, is_null] = env.get<_Boolx1>(tuple_it->id).split(); // get value
2290 is_null.discard(); // handled at NULL bitmap leaf
2291 setbit(byte_ptr, value, mask); // update bit
2292 } else {
2293 /*----- Load value. -----*/
2294 /* TODO: load byte once, create values with respective mask */
2295 if (tuple_value_it != tuple_value_schema.end()) {
2296 Var<Boolx1> value((*byte_ptr.clone() bitand mask.clone()).template to<bool>()); // mask bit with dynamic mask
2297 new (&values[tuple_value_idx]) SQL_t(_Boolx1(value));
2298 }
2299 /* Address for booleans not yet supported. */
2300 byte_ptr.discard();
2301 mask.discard();
2302 }
2303 } else { // entry without bit stride; if masking is required, we can use a static mask
2304 auto ptr = [&]() -> Ptr<void> {
2305 if (inode_iter and leaf_info.stride_in_bits) {
2306 /* omit `leaf_info.offset_in_bits` here to use it as static offset and mask;
2307 * this is valid since no bit stride means that the leaf byte offset computation is
2308 * independent of the static parts */
2309 U64x1 leaf_offset_in_bits = *inode_iter * leaf_info.stride_in_bits;
2310 U8x1 leaf_bit_offset = (leaf_offset_in_bits.clone() bitand uint64_t(7)).to<uint8_t>(); // mod 8
2311 I64x1 leaf_byte_offset = (leaf_offset_in_bits >> uint64_t(3)).make_signed(); // div 8
2312 Wasm_insist(leaf_bit_offset == 0U, "no leaf bit offset without bit stride");
2313 return inode_ptr + leaf_byte_offset;
2314 } else {
2315 return inode_ptr;
2316 }
2317 }(); // pointer to entry
2318
2319 const uint8_t static_bit_offset = leaf_info.offset_in_bits % 8;
2320 const int64_t static_byte_offset = leaf_info.offset_in_bits / 8;
2321
2322 /*----- Store value depending on its type. -----*/
2323 auto store = [&]<typename T>() {
2324 using type = typename T::type;
2325 M_insist(static_bit_offset == 0,
2326 "leaf offset of `Numeric`, `Date`, or `DateTime` must be byte aligned");
2327 auto [value, is_null] = env.get<T>(tuple_it->id).split(); // get value
2328 is_null.discard(); // handled at NULL bitmap leaf
2329 *(ptr + static_byte_offset).template to<type*>() = value;
2330 };
2331 /*----- Load value depending on its type. -----*/
2332 auto load = [&]<typename T>() {
2333 using type = typename T::type;
2334 M_insist(static_bit_offset == 0,
2335 "leaf offset of `Numeric`, `Date`, or `DateTime` must be byte aligned");
2336 if (tuple_value_it != tuple_value_schema.end()) {
2337 Var<PrimitiveExpr<type>> value(*(ptr.clone() + static_byte_offset).template to<type*>());
2338 new (&values[tuple_value_idx]) SQL_t(T(value));
2339 }
2340 if (tuple_addr_it != tuple_addr_schema.end())
2341 new (&addrs[tuple_addr_idx]) SQL_addr_t(
2342 (ptr.clone() + static_byte_offset).template to<type*>()
2343 );
2344 ptr.discard();
2345 };
2346 /*----- Select call target (store or load) and visit attribute type. -----*/
2347#define CALL(TYPE) if constexpr (IsStore) store.template operator()<TYPE>(); else load.template operator()<TYPE>()
2349 [&](const Boolean&) {
2350 Ptr<U8x1> byte_ptr = (ptr + static_byte_offset).template to<uint8_t*>();
2351 if constexpr (IsStore) {
2352 /*----- Store value. -----*/
2353 auto [value, is_null] = env.get<_Boolx1>(tuple_it->id).split(); // get value
2354 is_null.discard(); // handled at NULL bitmap leaf
2355 setbit<U8x1>(byte_ptr, value, static_bit_offset); // update bit
2356 } else {
2357 /*----- Load value. -----*/
2358 /* TODO: load byte once, create values with respective mask */
2359 const uint8_t static_mask = 1U << static_bit_offset;
2360
2361 if (tuple_value_it != tuple_value_schema.end()) {
2362 Var<Boolx1> value((*byte_ptr.clone() bitand static_mask).to<bool>()); // mask bit
2363 new (&values[tuple_value_idx]) SQL_t(_Boolx1(value));
2364 }
2365 /* Address for booleans not yet supported. */
2366 byte_ptr.discard();
2367 }
2368 },
2369 [&](const Numeric &n) {
2370 switch (n.kind) {
2371 case Numeric::N_Int:
2372 case Numeric::N_Decimal:
2373 switch (n.size()) {
2374 default: M_unreachable("invalid size");
2375 case 8: CALL(_I8x1 ); break;
2376 case 16: CALL(_I16x1); break;
2377 case 32: CALL(_I32x1); break;
2378 case 64: CALL(_I64x1); break;
2379 }
2380 break;
2381 case Numeric::N_Float:
2382 if (n.size() <= 32)
2383 CALL(_Floatx1);
2384 else
2385 CALL(_Doublex1);
2386 }
2387 },
2388 [&](const CharacterSequence &cs) {
2389 M_insist(static_bit_offset == 0, "leaf offset of `CharacterSequence` must be byte aligned");
2390 Ptr<Charx1> addr = (ptr + static_byte_offset).template to<char*>();
2391 if constexpr (IsStore) {
2392 /*----- Store value. -----*/
2393 auto value = env.get<NChar>(tuple_it->id); // get value
2394 IF (value.clone().not_null()) {
2395 strncpy(addr, value, U32x1(cs.size() / 8)).discard();
2396 };
2397 } else {
2398 /*----- Load value. -----*/
2399 new (&values[tuple_value_idx]) SQL_t(
2400 NChar(addr, layout_entry.nullable(), cs.length, cs.is_varying)
2401 );
2402 /* Omit addresses for character sequences. */
2403 }
2404 },
2405 [&](const Date&) { CALL(_I32x1); },
2406 [&](const DateTime&) { CALL(_I64x1); },
2407 [](auto&&) { M_unreachable("invalid type"); },
2408 }, *tuple_it->type);
2409#undef CALL
2410 }
2411 }
2412 }
2413 });
2414
2415 if constexpr (not IsStore) {
2416 /*----- Combine actual values and possible NULL bits to a new `SQL_t` and add this to the environment. -----*/
2417 for (std::size_t idx = 0; idx != tuple_value_schema.num_entries(); ++idx) {
2418 auto &tuple_entry = tuple_value_schema[idx];
2419 std::visit(overloaded{
2420 [&]<typename T>(Expr<T> value) {
2421 if (has_null_bitmap and layout_schema[tuple_entry.id].second.nullable()) {
2422 Expr<T> combined(value.insist_not_null(), null_bits[idx]);
2423 env.add(tuple_entry.id, combined);
2424 } else {
2425 env.add(tuple_entry.id, value);
2426 }
2427 },
2428 [&](NChar value) {
2429 if (has_null_bitmap and layout_schema[tuple_entry.id].second.nullable()) {
2430 /* introduce variable s.t. uses only load from it */
2431 Var<Ptr<Charx1>> combined(Select(null_bits[idx], Ptr<Charx1>::Nullptr(), value.val()));
2432 env.add(tuple_entry.id, NChar(combined, /* can_be_null=*/ true, value.length(),
2433 value.guarantees_terminating_nul()));
2434 } else {
2435 Var<Ptr<Charx1>> _value(value.val()); // introduce variable s.t. uses only load from it
2436 env.add(tuple_entry.id, NChar(_value, /* can_be_null=*/ false, value.length(),
2437 value.guarantees_terminating_nul()));
2438 }
2439 },
2440 [](auto) { M_unreachable("SIMDfication currently not supported"); },
2441 [](std::monostate) { M_unreachable("value must be loaded beforehand"); },
2442 }, values[idx]);
2443 }
2444
2445 /*----- Add addresses to the environment. -----*/
2446 for (std::size_t idx = 0; idx != tuple_addr_schema.num_entries(); ++idx) {
2447 auto &tuple_entry = tuple_addr_schema[idx];
2448 env.add_addr(tuple_entry.id, std::move(addrs[idx]));
2449 }
2450 }
2451
2452 /*----- Destroy created values and addresses. -----*/
2453 for (std::size_t idx = 0; idx < tuple_value_schema.num_entries(); ++idx)
2454 values[idx].~SQL_t();
2455 for (std::size_t idx = 0; idx < tuple_addr_schema.num_entries(); ++idx)
2456 addrs[idx].~SQL_addr_t();
2457 if constexpr (not IsStore) {
2458 /*----- Destroy created NULL bits. -----*/
2459 for (std::size_t idx = 0; idx != tuple_value_schema.num_entries(); ++idx) {
2460 if (has_null_bitmap and layout_schema[tuple_value_schema[idx].id].second.nullable())
2461 null_bits[idx].~Boolx1();
2462 }
2463 }
2464 base_address.discard(); // discard base address (as it was always cloned)
2465}
2466
2467}
2468
2469}
2470
2471void m::wasm::compile_store_point_access(const Schema &tuple_value_schema, const Schema &tuple_addr_schema,
2472 Ptr<void> base_address, const DataLayout &layout, const Schema &layout_schema,
2473 U64x1 tuple_id)
2474{
2475 return compile_data_layout_point_access<true>(tuple_value_schema, tuple_addr_schema, base_address, layout,
2476 layout_schema, tuple_id);
2477}
2478
2479void m::wasm::compile_load_point_access(const Schema &tuple_value_schema, const Schema &tuple_addr_schema,
2480 Ptr<void> base_address, const DataLayout &layout, const Schema &layout_schema,
2481 U64x1 tuple_id)
2482{
2483 return compile_data_layout_point_access<false>(tuple_value_schema, tuple_addr_schema, base_address, layout,
2484 layout_schema, tuple_id);
2485}
2486
2487
2488/*======================================================================================================================
2489 * Buffer
2490 *====================================================================================================================*/
2491
2492template<bool IsGlobal>
2493Buffer<IsGlobal>::Buffer(const Schema &schema, const DataLayoutFactory &factory, bool load_simdfied,
2494 std::size_t num_tuples, setup_t setup, pipeline_t pipeline, teardown_t teardown)
2495 : schema_(std::cref(schema))
2496 , layout_(factory.make(schema, num_tuples))
2497 , load_simdfied_(load_simdfied)
2498 , setup_(std::move(setup))
2499 , pipeline_(std::move(pipeline))
2500 , teardown_(std::move(teardown))
2501{
2502 M_insist(schema.num_entries() != 0, "buffer schema must not be empty");
2503
2504 if constexpr (IsGlobal) {
2505 if (layout_.is_finite()) {
2506 /*----- Pre-allocate memory for entire buffer. Use maximal possible alignment requirement of 8 bytes. ----*/
2507 const uint64_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2508 const uint64_t num_children =
2510 storage_.base_address_ =
2511 Module::Allocator().pre_allocate(num_children * child_size_in_bytes, /* alignment= */ 8);
2512 } else {
2513 storage_.capacity_.emplace(); // create global for capacity
2514 }
2515 }
2516}
2517
2518template<bool IsGlobal>
2520{
2521 if constexpr (IsGlobal) { // free memory of global buffer when object is destroyed and no use may occur later
2522 if (not layout_.is_finite()) {
2523 /*----- Deallocate memory for buffer. -----*/
2524 M_insist(bool(storage_.capacity_));
2525 const uint64_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2526 auto buffer_size_in_bytes =
2527 (*storage_.capacity_ / uint64_t(layout_.child().num_tuples())) * child_size_in_bytes;
2528 Module::Allocator().deallocate(storage_.base_address_, buffer_size_in_bytes);
2529 }
2530 }
2531}
2532
2533template<bool IsGlobal>
2535 param_t _tuple_addr_schema) const
2536{
2537#ifndef NDEBUG
2538 if (_tuple_value_schema) {
2539 for (auto &e : _tuple_value_schema->get())
2540 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple value schema entry not found");
2541 }
2542 if (_tuple_addr_schema) {
2543 for (auto &e : _tuple_addr_schema->get())
2544 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple address schema entry not found");
2545 }
2546#endif
2547
2548 static Schema empty_schema;
2549 const auto &tuple_value_schema = _tuple_value_schema ? _tuple_value_schema->get() : schema_.get();
2550 const auto &tuple_addr_schema = _tuple_addr_schema ? _tuple_addr_schema->get() : empty_schema;
2551
2552 return buffer_load_proxy_t(*this, tuple_value_schema, tuple_addr_schema);
2553}
2554
2555template<bool IsGlobal>
2557{
2558#ifndef NDEBUG
2559 if (tuple_schema) {
2560 for (auto &e : tuple_schema->get())
2561 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple schema entry not found");
2562 }
2563#endif
2564
2565 return tuple_schema ? buffer_store_proxy_t(*this, *tuple_schema) : buffer_store_proxy_t(*this, schema_);
2566}
2567
2568template<bool IsGlobal>
2570{
2571#ifndef NDEBUG
2572 if (tuple_schema) {
2573 for (auto &e : tuple_schema->get())
2574 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple schema entry not found");
2575 }
2576#endif
2577
2578 return tuple_schema ? buffer_swap_proxy_t(*this, *tuple_schema) : buffer_swap_proxy_t(*this, schema_);
2579}
2580
2581template<bool IsGlobal>
2583{
2584 M_insist(not base_address_, "must not call `setup()` twice");
2585 M_insist(not size_, "must not call `setup()` twice");
2586 M_insist(not capacity_, "must not call `setup()` twice");
2587 M_insist(not first_iteration_, "must not call `setup()` twice");
2588
2589 /*----- Create local variables. -----*/
2590 base_address_.emplace();
2591 size_.emplace();
2592 if (not layout_.is_finite()) {
2593 capacity_.emplace();
2594 first_iteration_.emplace(true); // set to true
2595 }
2596
2597 /*----- For global buffers, read values from global backups into local variables. -----*/
2598 if constexpr (IsGlobal) {
2599 /* omit assigning base address here as it will always be set below */
2600 *size_ = storage_.size_;
2601 if (not layout_.is_finite()) {
2602 M_insist(bool(storage_.capacity_));
2603 *capacity_ = *storage_.capacity_;
2604 }
2605 }
2606
2607 if (layout_.is_finite()) {
2608 if constexpr (IsGlobal) {
2609 *base_address_ = storage_.base_address_; // buffer always already pre-allocated
2610 } else {
2611 /*----- Pre-allocate memory for entire buffer. Use maximal possible alignment requirement of 8 bytes. ----*/
2612 const uint64_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2613 const uint64_t num_children =
2614 (layout_.num_tuples() + layout_.child().num_tuples() - 1) / layout_.child().num_tuples();
2615 *base_address_ = Module::Allocator().pre_allocate(num_children * child_size_in_bytes, /* alignment= */ 8);
2616 }
2617 } else {
2618 if constexpr (IsGlobal) {
2619 IF (*capacity_ == 0U) { // buffer not yet allocated
2620 /*----- Set initial capacity. -----*/
2621 *capacity_ = uint64_t(layout_.child().num_tuples());
2622
2623 /*----- Allocate memory for one child instance. Use max. possible alignment requirement of 8 bytes. --*/
2624 const uint64_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2625 *base_address_ = Module::Allocator().allocate(child_size_in_bytes, /* alignment= */ 8);
2626 } ELSE {
2627 *base_address_ = storage_.base_address_;
2628 };
2629 } else {
2630 /*----- Set initial capacity. -----*/
2631 *capacity_ = uint64_t(layout_.child().num_tuples());
2632
2633 /*----- Allocate memory for one child instance. Use max. possible alignment requirement of 8 bytes. -----*/
2634 const uint64_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2635 *base_address_ = Module::Allocator().allocate(child_size_in_bytes, /* alignment= */ 8);
2636 }
2637 }
2638}
2639
2640template<bool IsGlobal>
2642{
2643 M_insist(bool(base_address_), "must call `setup()` before");
2644 M_insist(bool(size_), "must call `setup()` before");
2645 M_insist(not layout_.is_finite() == bool(capacity_), "must call `setup()` before");
2646 M_insist(not layout_.is_finite() == bool(first_iteration_), "must call `setup()` before");
2647
2648 if constexpr (not IsGlobal) { // free memory of local buffer when user calls teardown method
2649 if (not layout_.is_finite()) {
2650 /*----- Deallocate memory for buffer. -----*/
2651 const uint64_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2652 auto buffer_size_in_bytes = (*capacity_ / uint64_t(layout_.child().num_tuples())) * child_size_in_bytes;
2653 Module::Allocator().deallocate(*base_address_, buffer_size_in_bytes);
2654 }
2655 }
2656
2657 /*----- For global buffers, write values from local variables into global backups. -----*/
2658 if constexpr (IsGlobal) {
2659 storage_.base_address_ = *base_address_;
2660 storage_.size_ = *size_;
2661 if (not layout_.is_finite()) {
2662 M_insist(bool(storage_.capacity_));
2663 *storage_.capacity_ = *capacity_;
2664 }
2665 }
2666
2667 /*----- Destroy local variables. -----*/
2668 base_address_.reset();
2669 size_.reset();
2670 if (not layout_.is_finite()) {
2671 capacity_.reset();
2672 first_iteration_->val().discard(); // artificial use to silence diagnostics if `consume()` is not called
2673 first_iteration_.reset();
2674 }
2675}
2676
2677template<bool IsGlobal>
2678void Buffer<IsGlobal>::resume_pipeline(param_t _tuple_value_schema, param_t _tuple_addr_schema) const
2679{
2680 if (not pipeline_)
2681 return;
2682
2683 static Schema empty_schema;
2684 const auto &tuple_value_schema = _tuple_value_schema ? _tuple_value_schema->get() : schema_.get();
2685 const auto &tuple_addr_schema = _tuple_addr_schema ? _tuple_addr_schema->get() : empty_schema;
2686
2687#ifndef NDEBUG
2688 for (auto &e : tuple_value_schema)
2689 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple value schema entry not found");
2690 for (auto &e : tuple_addr_schema)
2691 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple addr schema entry not found");
2692#endif
2693
2694 /*----- Create function on-demand to assert that all needed identifiers are already created. -----*/
2695 if (not resume_pipeline_) {
2696 /*----- Create function to resume the pipeline for each tuple contained in the buffer. -----*/
2697 FUNCTION(resume_pipeline, void(void*, uint64_t))
2698 {
2699 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
2700
2701 /*----- Access base address and size parameters. -----*/
2702 Ptr<void> base_address = PARAMETER(0);
2703 U64x1 size = PARAMETER(1);
2704
2705 /*----- Compute poss. number of SIMD lanes and decide which to use with regard to other ops. preferences. */
2706 const auto num_simd_lanes_preferred =
2707 CodeGenContext::Get().num_simd_lanes_preferred(); // get other operators preferences
2708 const std::size_t num_simd_lanes =
2709 load_simdfied_ ? std::max<std::size_t>({ num_simd_lanes_preferred,
2710 get_num_simd_lanes(layout_, schema_, tuple_value_schema),
2711 tuple_addr_schema.empty() ? 0UL : 2UL }) // 64-bit pointers and 128-bit SIMD vectors
2712 : 1;
2714
2715 /*----- Emit setup code *before* compiling data layout to not overwrite its temporary boolean variables. -*/
2716 setup_();
2717
2718 Var<U64x1> load_tuple_id; // default initialized to 0
2719
2720 if (tuple_value_schema.num_entries() == 0 and tuple_addr_schema.num_entries() == 0) {
2721 /*----- If no attributes must be loaded, generate a loop just executing the pipeline `size`-times. -----*/
2722 WHILE (load_tuple_id < size) {
2723 load_tuple_id += uint64_t(num_simd_lanes);
2724 pipeline_();
2725 }
2726 base_address.discard(); // since it is not needed
2727 } else {
2728 /*----- Compile data layout to generate sequential load from buffer. -----*/
2729 auto [load_inits, loads, load_jumps] =
2730 compile_load_sequential(tuple_value_schema, tuple_addr_schema, base_address, layout_,
2731 num_simd_lanes, schema_, load_tuple_id);
2732
2733 /*----- Generate loop for loading entire buffer, with the pipeline emitted into the loop body. -----*/
2734 load_inits.attach_to_current();
2735 WHILE (load_tuple_id < size) {
2736 loads.attach_to_current();
2737 pipeline_();
2738 load_jumps.attach_to_current();
2739 }
2740 }
2741
2742 /*----- Emit teardown code. -----*/
2743 teardown_();
2744 }
2745 resume_pipeline_ = std::move(resume_pipeline);
2746 }
2747
2748 /*----- Call created function. -----*/
2749 M_insist(bool(resume_pipeline_));
2750 (*resume_pipeline_)(base_address(), size()); // base address and size as arguments
2751}
2752
2753template<bool IsGlobal>
2754void Buffer<IsGlobal>::resume_pipeline_inline(param_t tuple_value_schema, param_t tuple_addr_schema) const
2755{
2756 execute_pipeline_inline(setup_, pipeline_, teardown_, std::move(tuple_value_schema), std::move(tuple_addr_schema));
2757}
2758
2759template<bool IsGlobal>
2761 param_t _tuple_value_schema, param_t _tuple_addr_schema) const
2762{
2763 if (not pipeline)
2764 return;
2765
2766 static Schema empty_schema;
2767 const auto &tuple_value_schema = _tuple_value_schema ? _tuple_value_schema->get() : schema_.get();
2768 const auto &tuple_addr_schema = _tuple_addr_schema ? _tuple_addr_schema->get() : empty_schema;
2769
2770#ifndef NDEBUG
2771 for (auto &e : tuple_value_schema)
2772 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple value schema entry not found");
2773 for (auto &e : tuple_addr_schema)
2774 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple addr schema entry not found");
2775#endif
2776
2777 /*----- Create function to resume the pipeline for each tuple contained in the buffer. -----*/
2778 FUNCTION(resume_pipeline, void(void*, uint64_t))
2779 {
2780 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
2781
2782 /*----- Access base address and size parameters. -----*/
2783 Ptr<void> base_address = PARAMETER(0);
2784 U64x1 size = PARAMETER(1);
2785
2786 /*----- Compute poss. number of SIMD lanes and decide which to use with regard to other ops. preferences. */
2787 const auto num_simd_lanes_preferred =
2788 CodeGenContext::Get().num_simd_lanes_preferred(); // get other operators preferences
2789 const std::size_t num_simd_lanes =
2790 load_simdfied_ ? std::max<std::size_t>({ num_simd_lanes_preferred,
2791 get_num_simd_lanes(layout_, schema_, tuple_value_schema),
2792 tuple_addr_schema.empty() ? 0UL : 2UL }) // 64-bit pointers and 128-bit SIMD vectors
2793 : 1;
2795
2796 /*----- Emit setup code *before* compiling data layout to not overwrite its temporary boolean variables. -*/
2797 setup();
2798
2799 Var<U64x1> load_tuple_id; // default initialized to 0
2800
2801 if (tuple_value_schema.num_entries() == 0 and tuple_addr_schema.num_entries() == 0) {
2802 /*----- If no attributes must be loaded, generate a loop just executing the pipeline `size`-times. -----*/
2803 WHILE (load_tuple_id < size) {
2804 load_tuple_id += uint64_t(num_simd_lanes);
2805 pipeline();
2806 }
2807 base_address.discard(); // since it is not needed
2808 } else {
2809 /*----- Compile data layout to generate sequential load from buffer. -----*/
2810 auto [load_inits, loads, load_jumps] =
2811 compile_load_sequential(tuple_value_schema, tuple_addr_schema, base_address, layout_, num_simd_lanes,
2812 schema_, load_tuple_id);
2813
2814 /*----- Generate loop for loading entire buffer, with the pipeline emitted into the loop body. -----*/
2815 load_inits.attach_to_current();
2816 WHILE (load_tuple_id < size) {
2817 loads.attach_to_current();
2818 pipeline();
2819 load_jumps.attach_to_current();
2820 }
2821 }
2822
2823 /*----- Emit teardown code. -----*/
2824 teardown();
2825 }
2826
2827 /*----- Call created function. -----*/
2828 resume_pipeline(base_address(), size()); // base address and size as arguments
2829}
2830
2831template<bool IsGlobal>
2833 param_t _tuple_value_schema, param_t _tuple_addr_schema) const
2834{
2835 if (not pipeline)
2836 return;
2837
2838 static Schema empty_schema;
2839 const auto &tuple_value_schema = _tuple_value_schema ? _tuple_value_schema->get() : schema_.get();
2840 const auto &tuple_addr_schema = _tuple_addr_schema ? _tuple_addr_schema->get() : empty_schema;
2841
2842#ifndef NDEBUG
2843 for (auto &e : tuple_value_schema)
2844 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple value schema entry not found");
2845 for (auto &e : tuple_addr_schema)
2846 M_insist(schema_.get().find(e.id) != schema_.get().cend(), "tuple addr schema entry not found");
2847#endif
2848
2849 /*----- Access base address and size depending on whether they are globals or locals. -----*/
2850 Ptr<void> base_address =
2851 M_CONSTEXPR_COND(IsGlobal,
2852 base_address_ ? base_address_->val() : Var<Ptr<void>>(storage_.base_address_.val()).val(),
2853 ({ M_insist(bool(base_address_)); base_address_->val(); }));
2854 U64x1 size =
2855 M_CONSTEXPR_COND(IsGlobal,
2856 size_ ? size_->val() : Var<U64x1>(storage_.size_.val()).val(),
2857 ({ M_insist(bool(size_)); size_->val(); }));
2858
2859 /*----- If predication is used, compute number of tuples to load from buffer depending on predicate. -----*/
2860 std::optional<Var<Boolx1>> pred; // use variable since WHILE loop will clone it (for IF and DO_WHILE)
2861 if (auto &env = CodeGenContext::Get().env(); env.predicated()) {
2862 M_insist(CodeGenContext::Get().num_simd_lanes() == 1, "invalid number of SIMD lanes");
2863 pred = env.extract_predicate<_Boolx1>().is_true_and_not_null();
2864 }
2865 U64x1 num_tuples = pred ? Select(*pred, size, 0U) : size;
2866
2867 /*----- Compute possible number of SIMD lanes and decide which to use with regard to other operators preferences. */
2868 const auto num_simd_lanes_preferred =
2869 CodeGenContext::Get().num_simd_lanes_preferred(); // get other operators preferences
2870 const std::size_t num_simd_lanes =
2871 load_simdfied_ ? std::max<std::size_t>({ num_simd_lanes_preferred,
2872 get_num_simd_lanes(layout_, schema_, tuple_value_schema),
2873 tuple_addr_schema.empty() ? 0UL : 2UL }) // 64-bit pointers and 128-bit SIMD vectors
2874 : 1;
2876
2877 /*----- Emit setup code *before* compiling data layout to not overwrite its temporary boolean variables. -----*/
2878 setup();
2879
2880 Var<U64x1> load_tuple_id(0); // explicitly (re-)set tuple ID to 0
2881
2882 if (tuple_value_schema.num_entries() == 0 and tuple_addr_schema.num_entries() == 0) {
2883 /*----- If no attributes must be loaded, generate a loop just executing the pipeline `size`-times. -----*/
2884 WHILE (load_tuple_id < num_tuples) {
2885 load_tuple_id += uint64_t(num_simd_lanes);
2886 pipeline();
2887 }
2888 base_address.discard(); // since it is not needed
2889 } else {
2890 /*----- Compile data layout to generate sequential load from buffer. -----*/
2891 auto [load_inits, loads, load_jumps] =
2892 compile_load_sequential(tuple_value_schema, tuple_addr_schema, base_address, layout_,
2893 num_simd_lanes, schema_, load_tuple_id);
2894
2895 /*----- Generate loop for loading entire buffer, with the pipeline emitted into the loop body. -----*/
2896 load_inits.attach_to_current();
2897 WHILE (load_tuple_id < num_tuples) {
2898 loads.attach_to_current();
2899 pipeline();
2900 load_jumps.attach_to_current();
2901 }
2902 }
2903
2904 /*----- Emit teardown code. -----*/
2905 teardown();
2906}
2907
2908template<bool IsGlobal>
2910{
2911 M_insist(bool(base_address_), "must call `setup()` before");
2912 M_insist(bool(size_), "must call `setup()` before");
2913 M_insist(not layout_.is_finite() == bool(capacity_), "must call `setup()` before");
2914 M_insist(not layout_.is_finite() == bool(first_iteration_), "must call `setup()` before");
2915
2916 /*----- Compile data layout to generate sequential single-pass store into the buffer. -----*/
2917 /* We are able to use a single-pass store, i.e. *local* pointers and masks, since we explicitly save the needed
2918 * variables, i.e. base address and size, using *global* backups and restore them before performing the actual
2919 * store in the case of global buffers. For local buffers, stores must be done in a single pass anyway. */
2920 static Schema empty_schema;
2921 auto [_store_inits, stores, _store_jumps] =
2922 compile_store_sequential_single_pass(schema_, empty_schema, *base_address_, layout_,
2923 CodeGenContext::Get().num_simd_lanes(), schema_, *size_);
2924 Block store_inits(std::move(_store_inits)), store_jumps(std::move(_store_jumps));
2925
2926 if (layout_.is_finite()) {
2927 IF (*size_ == 0U) { // buffer empty
2928 /*----- Emit initialization code for storing (i.e. (re-)set to first buffer slot). -----*/
2929 store_inits.attach_to_current();
2930 };
2931 } else {
2932 IF (*size_ == *capacity_) { // buffer full
2933 /*----- Resize buffer by doubling its capacity. -----*/
2934 const uint64_t child_size_in_bytes = (layout_.stride_in_bits() + 7) / 8;
2935 auto buffer_size_in_bytes = (*capacity_ / uint64_t(layout_.child().num_tuples())) * child_size_in_bytes;
2936 auto ptr = Module::Allocator().allocate(buffer_size_in_bytes.clone());
2937 Wasm_insist(ptr == *base_address_ + buffer_size_in_bytes.make_signed(),
2938 "buffer could not be resized sequentially in memory");
2939 *capacity_ *= 2U;
2940 };
2941
2942 IF (*first_iteration_) {
2943 /*----- Emit initialization code for storing (i.e. set to current buffer slot). -----*/
2944 store_inits.attach_to_current();
2945
2946 *first_iteration_ = false;
2947 };
2948 }
2949
2950 /*----- Emit storing code. -----*/
2951 stores.attach_to_current();
2952
2953 if (layout_.is_finite()) {
2954 IF (*size_ == uint64_t(layout_.num_tuples() - CodeGenContext::Get().num_simd_lanes())) { // buffer full
2955 /*----- Resume pipeline for each tuple in buffer and reset size of buffer to 0. -----*/
2956 *size_ = uint64_t(layout_.num_tuples()); // increment size of buffer to resume pipeline even for last tuple
2957 resume_pipeline();
2958 *size_ = 0U;
2959 } ELSE { // buffer not full
2960 /*----- Emit advancing code to next buffer slot and increment size of buffer. -----*/
2961 store_jumps.attach_to_current();
2962 };
2963 } else {
2964 /*----- Emit advancing code to next buffer slot and increment size of buffer. -----*/
2965 store_jumps.attach_to_current();
2966 }
2967}
2968
2969// explicit instantiations to prevent linker errors
2970template struct m::wasm::Buffer<false>;
2971template struct m::wasm::Buffer<true>;
2972
2973
2974/*======================================================================================================================
2975 * buffer accesses
2976 *====================================================================================================================*/
2977
2978template<bool IsGlobal>
2979void buffer_swap_proxy_t<IsGlobal>::operator()(U64x1 first, U64x1 second)
2980{
2981 /*----- Create load proxy. -----*/
2982 auto load = buffer_.get().create_load_proxy(schema_.get());
2983
2984 /*----- Load first tuple into fresh environment. -----*/
2985 auto env_first = [&](){
2987 load(first.clone());
2988 return S.extract();
2989 }();
2990
2991 operator()(first, second, env_first);
2992}
2993
2994template<bool IsGlobal>
2995void buffer_swap_proxy_t<IsGlobal>::operator()(U64x1 first, U64x1 second, const Environment &env_first)
2996{
2997 /*----- Create load and store proxies. -----*/
2998 auto load = buffer_.get().create_load_proxy(schema_.get());
2999 auto store = buffer_.get().create_store_proxy(schema_.get());
3000
3001 /*----- Temporarily save first tuple by creating variable or separate string buffer. -----*/
3002 Environment _env_first;
3003 for (auto &e : schema_.get()) {
3004 std::visit(overloaded {
3005 [&](NChar value) -> void {
3006 Var<Ptr<Charx1>> ptr; // always set here
3007 IF (value.clone().is_null()) {
3008 ptr = Ptr<Charx1>::Nullptr();
3009 } ELSE {
3010 ptr = Module::Allocator().pre_malloc<char>(value.size_in_bytes());
3011 strncpy(ptr, value, U32x1(value.size_in_bytes())).discard();
3012 };
3013 _env_first.add(e.id, NChar(ptr, value.can_be_null(), value.length(), value.guarantees_terminating_nul()));
3014 },
3015 [&]<typename T>(Expr<T> value) -> void {
3016 if (value.can_be_null()) {
3017 Var<Expr<T>> var(value);
3018 _env_first.add(e.id, var);
3019 } else {
3020 Var<PrimitiveExpr<T>> var(value.insist_not_null());
3021 _env_first.add(e.id, Expr<T>(var));
3022 }
3023 },
3024 [](auto) -> void { M_unreachable("SIMDfication currently not supported"); },
3025 [](std::monostate) -> void { M_unreachable("value must be loaded beforehand"); },
3026 }, env_first.get(e.id));
3027 }
3028
3029 /*----- Load second tuple in scoped environment and store it directly at first tuples address. -----*/
3030 {
3032 load(second.clone());
3033 store(first);
3034 }
3035
3036 /*----- Store temporarily saved first tuple at second tuples address. ----*/
3037 {
3038 auto S = CodeGenContext::Get().scoped_environment(std::move(_env_first));
3039 store(second);
3040 }
3041}
3042
3043template<bool IsGlobal>
3044void buffer_swap_proxy_t<IsGlobal>::operator()(U64x1 first, U64x1 second, const Environment &env_first,
3045 const Environment &env_second)
3046{
3047 /*----- Create store proxy. -----*/
3048 auto store = buffer_.get().create_store_proxy(schema_.get());
3049
3050 /*----- Temporarily save first tuple by creating variable or separate string buffer. -----*/
3051 Environment _env_first;
3052 for (auto &e : schema_.get()) {
3053 std::visit(overloaded {
3054 [&](NChar value) -> void {
3055 Var<Ptr<Charx1>> ptr; // always set here
3056 IF (value.clone().is_null()) {
3057 ptr = Ptr<Charx1>::Nullptr();
3058 } ELSE {
3059 ptr = Module::Allocator().pre_malloc<char>(value.size_in_bytes());
3060 strncpy(ptr, value, U32x1(value.size_in_bytes())).discard();
3061 };
3062 _env_first.add(e.id, NChar(ptr, value.can_be_null(), value.length(), value.guarantees_terminating_nul()));
3063 },
3064 [&]<typename T>(Expr<T> value) -> void {
3065 if (value.can_be_null()) {
3066 Var<Expr<T>> var(value);
3067 _env_first.add(e.id, var);
3068 } else {
3069 Var<PrimitiveExpr<T>> var(value.insist_not_null());
3070 _env_first.add(e.id, Expr<T>(var));
3071 }
3072 },
3073 [](auto) -> void { M_unreachable("SIMDfication currently not supported"); },
3074 [](std::monostate) -> void { M_unreachable("value must be loaded beforehand"); }
3075 }, env_first.get(e.id));
3076 }
3077
3078 /*----- Store already loaded second tuple directly at first tuples address. -----*/
3079 {
3081 CodeGenContext::Get().env().add(env_second);
3082 store(first);
3083 }
3084
3085 /*----- Store temporarily saved first tuple at second tuples address. ----*/
3086 {
3087 auto S = CodeGenContext::Get().scoped_environment(std::move(_env_first));
3088 store(second);
3089 }
3090}
3091
3092// explicit instantiations to prevent linker errors
3095
3096
3097/*======================================================================================================================
3098 * string comparison
3099 *====================================================================================================================*/
3100
3101_I32x1 m::wasm::strncmp(NChar _left, NChar _right, U32x1 len, bool reverse)
3102{
3103 static thread_local struct {} _; // unique caller handle
3104 struct data_t : GarbageCollectedData
3105 {
3106 public:
3107 using fn_t = int32_t(uint32_t, uint32_t, char*, char*, uint32_t);
3108 std::optional<FunctionProxy<fn_t>> strncmp_terminating_nul;
3109 std::optional<FunctionProxy<fn_t>> strncmp_no_terminating_nul;
3110
3111 data_t(GarbageCollectedData &&d) : GarbageCollectedData(std::move(d)) { }
3112 };
3113 auto &d = Module::Get().add_garbage_collected_data<data_t>(&_); // garbage collect the `data_t` instance
3114
3115 auto strncmp_non_null = [&d, &_left, &_right, &reverse](Ptr<Charx1> left, Ptr<Charx1> right, U32x1 len) -> I32x1 {
3116 Wasm_insist(left.clone().not_null(), "left operand must not be NULL");
3117 Wasm_insist(right.clone().not_null(), "right operand must not be NULL");
3118 Wasm_insist(len.clone() != 0U, "length to compare must not be 0");
3119
3120 if (_left.length() == 1 and _right.length() == 1) {
3121 /*----- Special handling of single char strings. -----*/
3122 len.discard();
3123 auto left_gt_right = *left.clone() > *right.clone();
3124 return left_gt_right.to<int32_t>() - (*left < *right).to<int32_t>();
3125 } else {
3126 if (_left.guarantees_terminating_nul() and _right.guarantees_terminating_nul() and not reverse) { // reverse needs in-bounds checks
3127 if (not d.strncmp_terminating_nul) {
3128 /*----- Create function to compute the result for non-nullptr arguments character-wise. -----*/
3129 FUNCTION(strncmp_terminating_nul, data_t::fn_t)
3130 {
3131 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
3132
3133 const auto len_ty_left = PARAMETER(0);
3134 const auto len_ty_right = PARAMETER(1);
3135 auto left = PARAMETER(2);
3136 auto right = PARAMETER(3);
3137 const auto len = PARAMETER(4);
3138
3139 Var<I32x1> result; // always set here
3140
3141 I32x1 len_left = Select(len < len_ty_left, len, len_ty_left) .make_signed();
3142 I32x1 len_right = Select(len < len_ty_right, len, len_ty_right).make_signed();
3143 Var<Ptr<Charx1>> end_left (left + len_left);
3144 Var<Ptr<Charx1>> end_right(right + len_right);
3145
3146 LOOP() {
3147 /* Check whether one side is shorter than the other. */
3148 result = (left != end_left).to<int32_t>() - (right != end_right).to<int32_t>();
3149 BREAK(result != 0 or left == end_left); // at the end of either or both strings
3150
3151 /* Compare by current character. Loading is valid since we have not seen the terminating
3152 * NUL byte yet. */
3153 result = (*left > *right).to<int32_t>() - (*left < *right).to<int32_t>();
3154 BREAK(result != 0); // found first position where strings differ
3155 BREAK(*left == 0); // reached end of identical strings
3156
3157 /* Advance to next character. */
3158 left += 1;
3159 right += 1;
3160 CONTINUE();
3161 }
3162
3163 RETURN(result);
3164 }
3165 d.strncmp_terminating_nul = std::move(strncmp_terminating_nul);
3166 }
3167
3168 /*----- Call strncmp_terminating_nul function. ------*/
3169 M_insist(bool(d.strncmp_terminating_nul));
3170 return (*d.strncmp_terminating_nul)(_left.length(), _right.length(), left, right, len);
3171 } else {
3172 if (not d.strncmp_no_terminating_nul) {
3173 /*----- Create function to compute the result for non-nullptr arguments character-wise. -----*/
3174 FUNCTION(strncmp_no_terminating_nul, data_t::fn_t)
3175 {
3176 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
3177
3178 const auto len_ty_left = PARAMETER(0);
3179 const auto len_ty_right = PARAMETER(1);
3180 Var<Ptr<Charx1>> left(PARAMETER(2));
3181 Var<Ptr<Charx1>> right(PARAMETER(3));
3182 const auto len = PARAMETER(4);
3183
3184 Var<I32x1> result; // always set here
3185
3186 I32x1 len_left = Select(len < len_ty_left, len, len_ty_left) .make_signed();
3187 I32x1 len_right = Select(len < len_ty_right, len, len_ty_right).make_signed();
3188 Var<Ptr<Charx1>> end_left, end_right;
3189
3190 if (not reverse) {
3191 /* Set end variables according to theoretical length. */
3192 end_left = left + len_left;
3193 end_right = right + len_right;
3194 } else {
3195 /* Set end variables to first found NUL byte without exceeding the theoretical length. */
3196 end_left = left;
3197 WHILE(*end_left != 0 and end_left != left + len_left) {
3198 end_left += 1;
3199 }
3200 end_right = right;
3201 WHILE(*end_right != 0 and end_right != right + len_right) {
3202 end_right += 1;
3203 }
3204
3205 /* Swap variable for current position with the one for end position to iterate reversed. */
3206 swap(left, end_left);
3207 swap(right, end_right);
3208
3209 /* Resolve off-by-one errors created by swapping variables. */
3210 left -= 1;
3211 right -= 1;
3212 end_left -= 1;
3213 end_right -= 1;
3214 }
3215
3216 LOOP() {
3217 /* Check whether one side is shorter than the other. Load next character with in-bounds
3218 * checks since the strings may not be NUL byte terminated. */
3219 Var<Charx1> val_left, val_right;
3220 IF (left != end_left) {
3221 val_left = *left;
3222 } ELSE {
3223 val_left = '\0';
3224 };
3225 IF (right != end_right) {
3226 val_right = *right;
3227 } ELSE {
3228 val_right = '\0';
3229 };
3230
3231 /* Compare by current character. */
3232 result = (val_left > val_right).to<int32_t>() - (val_left < val_right).to<int32_t>();
3233 BREAK(result != 0); // found first position where strings differ
3234 BREAK(val_left == 0); // reached end of identical strings
3235
3236 /* Advance to next character. */
3237 left += reverse ? -1 : 1;
3238 right += reverse ? -1 : 1;
3239 CONTINUE();
3240 }
3241
3242 RETURN(result);
3243 }
3244 d.strncmp_no_terminating_nul = std::move(strncmp_no_terminating_nul);
3245 }
3246
3247 /*----- Call strncmp_no_terminating_nul function. ------*/
3248 M_insist(bool(d.strncmp_no_terminating_nul));
3249 return (*d.strncmp_no_terminating_nul)(_left.length(), _right.length(), left, right, len);
3250 }
3251 }
3252 };
3253
3254 const Var<Ptr<Charx1>> left(_left.val()), right(_right.val());
3255 if (_left.can_be_null() or _right.can_be_null()) {
3256 _Var<I32x1> result; // always set here
3257 IF (left.is_null() or right.is_null()) {
3258 result = _I32x1::Null();
3259 } ELSE {
3260 result = strncmp_non_null(left, right, len);
3261 };
3262 return result;
3263 } else {
3264 const Var<I32x1> result(strncmp_non_null(left, right, len)); // to prevent duplicated computation due to `clone()`
3265 return _I32x1(result);
3266 }
3267}
3268
3269_I32x1 m::wasm::strcmp(NChar left, NChar right, bool reverse)
3270{
3271 /* Delegate to `strncmp` with length set to minimum of both string lengths **plus** 1 since we need to check if
3272 * one string is a prefix of the other, i.e. all of its characters are equal but it is shorter than the other. */
3273 U32x1 len(std::min<uint32_t>(left.length(), right.length()) + 1U);
3274 return strncmp(left, right, len, reverse);
3275}
3276
3277_Boolx1 m::wasm::strncmp(NChar left, NChar right, U32x1 len, cmp_op op, bool reverse)
3278{
3279 _I32x1 res = strncmp(left, right, len, reverse);
3280
3281 switch (op) {
3282 case EQ: return res == 0;
3283 case NE: return res != 0;
3284 case LT: return res < 0;
3285 case LE: return res <= 0;
3286 case GT: return res > 0;
3287 case GE: return res >= 0;
3288 }
3289}
3290
3291_Boolx1 m::wasm::strcmp(NChar left, NChar right, cmp_op op, bool reverse)
3292{
3293 _I32x1 res = strcmp(left, right, reverse);
3294
3295 switch (op) {
3296 case EQ: return res == 0;
3297 case NE: return res != 0;
3298 case LT: return res < 0;
3299 case LE: return res <= 0;
3300 case GT: return res > 0;
3301 case GE: return res >= 0;
3302 }
3303}
3304
3305
3306/*======================================================================================================================
3307 * string copy
3308 *====================================================================================================================*/
3309
3311{
3312 static thread_local struct {} _; // unique caller handle
3313 struct data_t : GarbageCollectedData
3314 {
3315 public:
3316 std::optional<FunctionProxy<char*(char*, char*, uint32_t)>> strncpy;
3317
3318 data_t(GarbageCollectedData &&d) : GarbageCollectedData(std::move(d)) { }
3319 };
3320 auto &d = Module::Get().add_garbage_collected_data<data_t>(&_); // garbage collect the `data_t` instance
3321
3322 if (not d.strncpy) {
3323 /*----- Create function to compute the result. -----*/
3324 FUNCTION(strncpy, char*(char*, char*, uint32_t))
3325 {
3326 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
3327
3328 auto dst = PARAMETER(0);
3329 auto src = PARAMETER(1);
3330 const auto count = PARAMETER(2);
3331
3332 Wasm_insist(not src.is_nullptr(), "source must not be nullptr");
3333 Wasm_insist(not dst.is_nullptr(), "destination must not be nullptr");
3334
3335 Var<Ptr<Charx1>> src_end(src + count.make_signed());
3336 WHILE (src != src_end) {
3337 *dst = *src;
3338 BREAK(*src == '\0'); // break on terminating NUL byte
3339 src += 1;
3340 dst += 1;
3341 }
3342
3343 RETURN(dst);
3344 }
3345 d.strncpy = std::move(strncpy);
3346 }
3347
3348 /*----- Call strncpy function. ------*/
3349 M_insist(bool(d.strncpy));
3350 const Var<Ptr<Charx1>> result((*d.strncpy)(dst, src, count)); // to prevent duplicated computation due to `clone()`
3351 return result;
3352}
3353
3354
3355/*======================================================================================================================
3356 * WasmLike
3357 *====================================================================================================================*/
3358
3359_Boolx1 m::wasm::like(NChar _str, NChar _pattern, const char escape_char)
3360{
3361 static thread_local struct {} _; // unique caller handle
3362 struct data_t : GarbageCollectedData
3363 {
3364 public:
3365 std::optional<FunctionProxy<bool(int32_t, int32_t, char*, char*, char)>> like;
3366
3367 data_t(GarbageCollectedData &&d) : GarbageCollectedData(std::move(d)) { }
3368 };
3369 auto &d = Module::Get().add_garbage_collected_data<data_t>(&_); // garbage collect the `data_t` instance
3370
3371 M_insist('_' != escape_char and '%' != escape_char, "illegal escape character");
3372
3373 if (_str.length() == 0 and _pattern.length() == 0) {
3374 _str.discard();
3375 _pattern.discard();
3376 return _Boolx1(true);
3377 }
3378
3379 auto like_non_null = [&d, &_str, &_pattern, &escape_char](Ptr<Charx1> str, Ptr<Charx1> pattern) -> Boolx1 {
3380 Wasm_insist(str.clone().not_null(), "string operand must not be NULL");
3381 Wasm_insist(pattern.clone().not_null(), "pattern operand must not be NULL");
3382
3383 if (not d.like) {
3384 /*----- Create function to compute the result. -----*/
3385 FUNCTION(like, bool(int32_t, int32_t, char*, char*, char))
3386 {
3387 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
3388
3389 const auto len_ty_str = PARAMETER(0);
3390 const auto len_ty_pattern = PARAMETER(1);
3391 const auto val_str = PARAMETER(2);
3392 const auto val_pattern = PARAMETER(3);
3393 const auto escape_char = PARAMETER(4);
3394
3395 /*----- Allocate memory for the dynamic programming table. -----*/
3396 /* Invariant: dp[i][j] == true iff val_pattern[:i] contains val_str[:j]. Row i and column j is located
3397 * at dp + (i - 1) * (`length_str` + 1) + (j - 1). */
3398 I32x1 num_entries = (len_ty_str + 1) * (len_ty_pattern + 1);
3399 const Var<Ptr<Boolx1>> dp = Module::Allocator().malloc<bool>(num_entries.clone().make_unsigned());
3400
3401 /*----- Initialize table with all entries set to false. -----*/
3402 Var<Ptr<Boolx1>> entry(dp.val());
3403 WHILE (entry < dp + num_entries.clone()) {
3404 *entry = false;
3405 entry += 1;
3406 }
3407
3408 /*----- Reset entry pointer to first entry. -----*/
3409 entry = dp.val();
3410
3411 /*----- Create pointers to track locations of current characters of `val_str` and `val_pattern`. -----*/
3412 Var<Ptr<Charx1>> str(val_str);
3413 Var<Ptr<Charx1>> pattern(val_pattern);
3414
3415 /*----- Compute ends of str and pattern. -----*/
3416 /* Create constant local variables to ensure correct pointers since `src` and `pattern` will change. */
3417 const Var<Ptr<Charx1>> end_str(str + len_ty_str);
3418 const Var<Ptr<Charx1>> end_pattern(pattern + len_ty_pattern);
3419
3420 /*----- Create variables for the current byte of str and pattern. -----*/
3421 Var<Charx1> byte_str, byte_pattern; // always loaded before first access
3422
3423 /*----- Initialize first column. -----*/
3424 /* Iterate until current byte of pattern is not a `%`-wildcard and set the respective entries to true. */
3425 DO_WHILE (byte_pattern == '%') {
3426 byte_pattern = Select(pattern < end_pattern, *pattern, '\0');
3427 *entry = true;
3428 entry += len_ty_str + 1;
3429 pattern += 1;
3430 }
3431
3432 /*----- Compute entire table. -----*/
3433 /* Create variable for the actual length of str. */
3434 Var<I32x1> len_str(0);
3435
3436 /* Create flag whether the current byte of pattern is not escaped. */
3437 Var<Boolx1> is_not_escaped(true);
3438
3439 /* Reset entry pointer to second row and second column. */
3440 entry = dp + len_ty_str + 2;
3441
3442 /* Reset pattern to first character. */
3443 pattern = val_pattern;
3444
3445 /* Load first byte from pattern if in bounds. */
3446 byte_pattern = Select(pattern < end_pattern, *pattern, '\0');
3447
3448 /* Create loop iterating as long as the current byte of pattern is not NUL. */
3449 WHILE (byte_pattern != '\0') {
3450 /* If current byte of pattern is not escaped and equals `escape_char`, advance pattern to next
3451 * byte and load it. Additionally, mark this byte as escaped and check for invalid escape
3452 * sequences. */
3453 IF (is_not_escaped and byte_pattern == escape_char) {
3454 pattern += 1;
3455 byte_pattern = Select(pattern < end_pattern, *pattern, '\0');
3456
3457 /* Check whether current byte of pattern is a validly escaped character, i.e. `_`, `%` or
3458 * `escape_char`. If not, throw an exception. */
3459 IF (byte_pattern != '_' and byte_pattern != '%' and byte_pattern != escape_char) {
3460 Throw(exception::invalid_escape_sequence);
3461 };
3462
3463 is_not_escaped = false;
3464 };
3465
3466 /* Reset actual length of str. */
3467 len_str = 0;
3468
3469 /* Load first byte from str if in bounds. */
3470 byte_str = Select(str < end_str, *str, '\0');
3471
3472 /* Create loop iterating as long as the current byte of str is not NUL. */
3473 WHILE (byte_str != '\0') {
3474 /* Increment actual length of str. */
3475 len_str += 1;
3476
3477 IF (is_not_escaped and byte_pattern == '%') {
3478 /* Store disjunction of above and left entry. */
3479 *entry = *(entry - (len_ty_str + 1)) or *(entry - 1);
3480 } ELSE {
3481 IF ((is_not_escaped and byte_pattern == '_') or byte_pattern == byte_str) {
3482 /* Store above left entry. */
3483 *entry = *(entry - (len_ty_str + 2));
3484 };
3485 };
3486
3487 /* Advance entry pointer to next entry, advance str to next byte, and load next byte from str
3488 * if in bounds. */
3489 entry += 1;
3490 str += 1;
3491 byte_str = Select(str < end_str, *str, '\0');
3492 }
3493
3494 /* Advance entry pointer to second column in the next row, reset str to first character, advance
3495 * pattern to next byte, load next byte from pattern if in bounds, and reset is_not_escaped to
3496 * true. */
3497 entry += len_ty_str + 1 - len_str;
3498 str = val_str;
3499 pattern += 1;
3500 byte_pattern = Select(pattern < end_pattern, *pattern, '\0');
3501 is_not_escaped = true;
3502 }
3503
3504 /*----- Compute result. -----*/
3505 /* Entry pointer points currently to the second column in the first row after the pattern has ended.
3506 * Therefore, we have to go one row up and len_str - 1 columns to the right, i.e. the result is
3507 * located at entry - (`length_str` + 1) + len_str - 1 = entry + len_str - (`length_str` + 2). */
3508 const Var<Boolx1> result(*(entry + len_str - (len_ty_str + 2)));
3509
3510 /*----- Free allocated space. -----*/
3511 Module::Allocator().free(dp, num_entries.make_unsigned());
3512
3513 RETURN(result);
3514 }
3515
3516 d.like = std::move(like);
3517 }
3518
3519 /*----- Call like function. ------*/
3520 M_insist(bool(d.like));
3521 return (*d.like)(_str.length(), _pattern.length(), str, pattern, escape_char);
3522 };
3523
3524 if (_str.can_be_null() or _pattern.can_be_null()) {
3525 auto [_val_str, is_null_str] = _str.split();
3526 auto [_val_pattern, is_null_pattern] = _pattern.split();
3527 Ptr<Charx1> val_str(_val_str), val_pattern(_val_pattern); // since structured bindings cannot be used in lambda capture
3528
3529 _Var<Boolx1> result; // always set here
3530 IF (is_null_str or is_null_pattern) {
3531 result = _Boolx1::Null();
3532 } ELSE {
3533 result = like_non_null(val_str, val_pattern);
3534 };
3535 return result;
3536 } else {
3537 const Var<Boolx1> result(like_non_null(_str, _pattern)); // to prevent duplicated computation due to `clone()`
3538 return _Boolx1(result);
3539 }
3540}
3541
3543{
3544 static thread_local struct {} _; // unique caller handle
3545 struct data_t : GarbageCollectedData
3546 {
3547 public:
3549 std::unordered_map<ThreadSafePooledString, FunctionProxy<bool(int32_t, char*)>> contains_map;
3550
3551 data_t(GarbageCollectedData &&d) : GarbageCollectedData(std::move(d)) { }
3552 };
3553 auto &d = Module::Get().add_garbage_collected_data<data_t>(&_); // garbage collect the `data_t` instance
3554
3555 M_insist(std::regex_match(*_pattern, std::regex("%[^_%\\\\]+%")), "invalid contains pattern");
3556
3557 if (_str.length() == 0) {
3558 _str.discard();
3559 return _Boolx1(false);
3560 }
3561
3562 auto contains_non_null = [&d, &_str, &_pattern](Ptr<Charx1> str) -> Boolx1 {
3563 Wasm_insist(str.clone().not_null(), "string operand must not be NULL");
3564
3565 auto it = d.contains_map.find(_pattern);
3566 if (it == d.contains_map.end()) {
3567 /*----- Create function to compute the result. -----*/
3568 FUNCTION(contains, bool(int32_t, char*))
3569 {
3570 auto S = CodeGenContext::Get().scoped_environment(); // create scoped environment for this function
3571
3572 const auto len_ty_str = PARAMETER(0);
3573 auto val_str = PARAMETER(1);
3574
3575 /*----- Copy pattern without enclosing `%` to make it accessible with runtime offset. -----*/
3576 const int64_t len_pattern = strlen(*_pattern) - 2; // minus 2 due to enclosing `%`
3577 auto pattern = Module::Allocator().raw_malloc<char>(len_pattern);
3578 for (std::size_t i = 0; i < len_pattern; ++i)
3579 pattern[i] = (*_pattern)[i + 1]; // access _pattern with offset +1 due to starting `%`
3580
3581 /*----- Precompute prefix table. -----*/
3582 auto tbl = Module::Allocator().raw_malloc<int64_t>(len_pattern + 1);
3583 int64_t len_prefix = -1;
3584
3585 tbl[0] = len_prefix;
3586 for (std::size_t i = 1; i < len_pattern + 1; ++i) {
3587 while (len_prefix >= 0 and pattern[len_prefix] != pattern[i - 1])
3588 len_prefix = tbl[len_prefix];
3589 ++len_prefix;
3590 tbl[i] = len_prefix;
3591 }
3592
3593 /*----- Search pattern in string. -----*/
3594 const Var<Ptr<Charx1>> end_str(val_str + len_ty_str);
3595 Var<I64x1> pos_pattern(0);
3596 WHILE (val_str < end_str and *val_str != '\0') {
3597 WHILE(*val_str != *(Ptr<Charx1>(pattern) + pos_pattern)) {
3598 Wasm_insist(pos_pattern < len_pattern + 1);
3599 pos_pattern = *(Ptr<I64x1>(tbl) + pos_pattern);
3600 IF (pos_pattern < 0) {
3601 BREAK();
3602 };
3603 }
3604 val_str += 1;
3605 pos_pattern += 1;
3606 IF (pos_pattern == len_pattern) {
3607 RETURN(true);
3608 };
3609 }
3610 RETURN(false);
3611 }
3612 it = d.contains_map.emplace_hint(it, _pattern, std::move(contains));
3613 }
3614
3615 /*----- Call contains function. ------*/
3616 M_insist(it != d.contains_map.end());
3617 return (it->second)(_str.length(), str);
3618 };
3619
3620 if (_str.can_be_null()) {
3621 auto [_val_str, is_null_str] = _str.split();
3622 Ptr<Charx1> val_str(_val_str); // since structured bindings cannot be used in lambda capture
3623
3624 _Var<Boolx1> result; // always set here
3625 IF (is_null_str) {
3626 result = _Boolx1::Null();
3627 } ELSE {
3628 result = contains_non_null(val_str);
3629 };
3630 return result;
3631 } else {
3632 const Var<Boolx1> result(contains_non_null(_str)); // to prevent duplicated computation due to `clone()`
3633 return _Boolx1(result);
3634 }
3635}
3636
3638{
3639 M_insist(std::regex_match(*pattern, std::regex("[^_%\\\\]+%")), "invalid prefix pattern");
3640
3641 /*----- Create lower bound. -----*/
3642 const int32_t len_pattern = strlen(*pattern) - 1; // minus 1 due to ending `%`
3643 auto _lower_bound = Module::Allocator().raw_malloc<char>(len_pattern + 1);
3644 for (std::size_t i = 0; i < len_pattern; ++i)
3645 _lower_bound[i] = (*pattern)[i];
3646 _lower_bound[len_pattern] = '\0';
3647 NChar lower_bound(Ptr<Charx1>(_lower_bound), false, len_pattern, true);
3648
3649 /*----- Create upper bound. -----*/
3650 auto _upper_bound = Module::Allocator().raw_malloc<char>(len_pattern + 1);
3651 for (std::size_t i = 0; i < len_pattern - 1; ++i)
3652 _upper_bound[i] = (*pattern)[i];
3653 const char last_char = (*pattern)[len_pattern - 1];
3654 _upper_bound[len_pattern - 1] = last_char + 1; // increment last character for upper bound
3655 _upper_bound[len_pattern] = '\0';
3656 NChar upper_bound(Ptr<Charx1>(_upper_bound), false, len_pattern, true);
3657
3658 /*----- Compute result by checking whether given string is in created interval. -----*/
3659 auto str_cpy = str.clone();
3660 return strcmp(str_cpy, lower_bound, GE) and strcmp(str, upper_bound, LT);
3661}
3662
3664{
3665 M_insist(std::regex_match(*pattern, std::regex("%[^_%\\\\]+")), "invalid suffix pattern");
3666
3667 /*----- Create lower bound. -----*/
3668 const int32_t len_pattern = strlen(*pattern) - 1; // minus 1 due to starting `%`
3669 auto _lower_bound = Module::Allocator().raw_malloc<char>(len_pattern + 1);
3670 for (std::size_t i = 0; i < len_pattern; ++i)
3671 _lower_bound[i] = (*pattern)[i + 1]; // access pattern with offset +1 due to starting `%`
3672 _lower_bound[len_pattern] = '\0';
3673 NChar lower_bound(Ptr<Charx1>(_lower_bound), false, len_pattern, true);
3674
3675 /*----- Create upper bound. -----*/
3676 auto _upper_bound = Module::Allocator().raw_malloc<char>(len_pattern + 1);
3677 const char first_char = (*pattern)[1]; // access first character at offset 1 due to starting `%`
3678 _upper_bound[0] = first_char + 1; // increment first character for upper bound
3679 for (std::size_t i = 1; i < len_pattern; ++i)
3680 _upper_bound[i] = (*pattern)[i + 1]; // access pattern with offset +1 due to starting `%`
3681 _upper_bound[len_pattern] = '\0';
3682 NChar upper_bound(Ptr<Charx1>(_upper_bound), false, len_pattern, true);
3683
3684 /*----- Compute result by checking whether given string is in created interval when reversed. -----*/
3685 const auto max_length = std::max<uint32_t>(str.length(), len_pattern); // use maximal length due to reversed strncmp
3686 auto str_cpy = str.clone();
3687 return strncmp(str_cpy, lower_bound, U32x1(max_length), GE, true) and
3688 strncmp(str, upper_bound, U32x1(max_length), LT, true);
3689}
3690
3691
3692/*======================================================================================================================
3693 * comparator
3694 *====================================================================================================================*/
3695
3696template<bool Predicated>
3697I32x1 m::wasm::compare(const Environment &env_left, const Environment &env_right,
3698 const std::vector<SortingOperator::order_type> &order)
3699{
3700 if constexpr (Predicated) {
3701 Var<I32x1> result(0); // explicitly (re-)set result to 0
3702
3703 /*----- Compile ordering. -----*/
3704 for (auto &o : order) {
3705 /*----- Compile order expression for left tuple. -----*/
3706 SQL_t _val_left = env_left.compile(o.first);
3707
3708 std::visit(overloaded {
3709 [&]<typename T>(Expr<T> val_left) -> void {
3710 /*----- Compile order expression for right tuple. -----*/
3711 Expr<T> val_right = env_right.template compile<Expr<T>>(o.first);
3712
3713 M_insist(val_left.can_be_null() == val_right.can_be_null(),
3714 "either both or none of the value to compare must be nullable");
3715 if (val_left.can_be_null()) {
3716 using type = std::conditional_t<std::is_same_v<T, bool>, _I32x1, Expr<T>>;
3717 Var<type> left, right;
3718 if constexpr (std::is_same_v<T, bool>) {
3719 left = val_left.template to<int32_t>();
3720 right = val_right.template to<int32_t>();
3721 } else {
3722 left = val_left;
3723 right = val_right;
3724 }
3725
3726 /*----- Compare both with current order expression and update result. -----*/
3727 I32x1 cmp_null = right.is_null().template to<int32_t>() - left.is_null().template to<int32_t>();
3728 _I32x1 _val_lt = (left < right).template to<int32_t>();
3729 _I32x1 _val_gt = (left > right).template to<int32_t>();
3730 _I32x1 _cmp_val = o.second ? _val_gt - _val_lt : _val_lt - _val_gt;
3731 auto [cmp_val, cmp_is_null] = _cmp_val.split();
3732 cmp_is_null.discard();
3733 I32x1 cmp = (cmp_null << 1) + cmp_val; // potentially-null value of comparison is overruled by cmp_null
3734 result <<= 2; // shift result s.t. first difference will determine order
3735 result += cmp; // add current comparison to result
3736 } else {
3737 using type = std::conditional_t<std::is_same_v<T, bool>, I32x1, PrimitiveExpr<T>>;
3738 Var<type> left, right;
3739 if constexpr (std::is_same_v<T, bool>) {
3740 left = val_left.insist_not_null().template to<int32_t>();
3741 right = val_right.insist_not_null().template to<int32_t>();
3742 } else {
3743 left = val_left.insist_not_null();
3744 right = val_right.insist_not_null();
3745 }
3746
3747 /*----- Compare both with current order expression and update result. -----*/
3748 I32x1 val_lt = (left < right).template to<int32_t>();
3749 I32x1 val_gt = (left > right).template to<int32_t>();
3750 I32x1 cmp = o.second ? val_gt - val_lt : val_lt - val_gt;
3751 result <<= 1; // shift result s.t. first difference will determine order
3752 result += cmp; // add current comparison to result
3753 }
3754 },
3755 [&](NChar val_left) -> void {
3756 auto &cs = as<const CharacterSequence>(*o.first.get().type());
3757
3758 /*----- Compile order expression for right tuple. -----*/
3759 NChar val_right = env_right.template compile<NChar>(o.first);
3760
3761 Var<Ptr<Charx1>> _left(val_left.val()), _right(val_right.val());
3762 NChar left(_left, val_left.can_be_null(), val_left.length(), val_left.guarantees_terminating_nul()),
3763 right(_right, val_right.can_be_null(), val_right.length(), val_right.guarantees_terminating_nul());
3764
3765 M_insist(val_left.can_be_null() == val_right.can_be_null(),
3766 "either both or none of the value to compare must be nullable");
3767 if (val_left.can_be_null()) {
3768 /*----- Compare both with current order expression and update result. -----*/
3769 I32x1 cmp_null = _right.is_null().to<int32_t>() - _left.is_null().to<int32_t>();
3770 _I32x1 _delta = o.second ? strcmp(left, right) : strcmp(right, left);
3771 auto [delta_val, delta_is_null] = _delta.split();
3772 Wasm_insist(delta_val.clone() >= -1 and delta_val.clone() <= 1,
3773 "result of strcmp is assumed to be in [-1,1]");
3774 delta_is_null.discard();
3775 I32x1 cmp = (cmp_null << 1) + delta_val; // potentially-null value of comparison is overruled by cmp_null
3776 result <<= 2; // shift result s.t. first difference will determine order
3777 result += cmp; // add current comparison to result
3778 } else {
3779 /*----- Compare both with current order expression and update result. -----*/
3780 I32x1 delta = o.second ? strcmp(left, right).insist_not_null()
3781 : strcmp(right, left).insist_not_null();
3782 Wasm_insist(delta.clone() >= -1 and delta.clone() <= 1,
3783 "result of strcmp is assumed to be in [-1,1]");
3784 result <<= 1; // shift result s.t. first difference will determine order
3785 result += delta; // add current comparison to result
3786 }
3787 },
3788 [](auto&&) -> void { M_unreachable("SIMDfication currently not supported"); },
3789 [](std::monostate) -> void { M_unreachable("invalid expression"); }
3790 }, _val_left);
3791 }
3792
3793 return result;
3794 } else {
3795 Var<I32x1> result; // always set here
3796
3797 /*----- Compile ordering. -----*/
3798 BLOCK(compare) {
3799 auto emit_comparison_rec = [&](decltype(order.cbegin()) curr, const decltype(order.cend()) end,
3800 auto &rec) -> void
3801 {
3802 /*----- If end of ordering is reached, left and right tuple are equal. -----*/
3803 if (curr == end) {
3804 result = 0;
3805 return;
3806 }
3807
3808 /*----- Compile order expression for left tuple. -----*/
3809 SQL_t _val_left = env_left.compile(curr->first);
3810
3811 std::visit(overloaded {
3812 [&]<typename T>(Expr<T> val_left) -> void {
3813 /*----- Compile order expression for right tuple. -----*/
3814 Expr<T> val_right = env_right.template compile<Expr<T>>(curr->first);
3815
3816 M_insist(val_left.can_be_null() == val_right.can_be_null(),
3817 "either both or none of the value to compare must be nullable");
3818 if (val_left.can_be_null()) {
3819 using type = std::conditional_t<std::is_same_v<T, bool>, _I32x1, Expr<T>>;
3820 Var<type> _left, _right;
3821 if constexpr (std::is_same_v<T, bool>) {
3822 _left = val_left.template to<int32_t>();
3823 _right = val_right.template to<int32_t>();
3824 } else {
3825 _left = val_left;
3826 _right = val_right;
3827 }
3828
3829 /*----- Compare both with current order expression and potentially set result. -----*/
3830 IF (_left.not_null()) {
3831 IF (_right.is_null()) {
3832 result = 1;
3833 GOTO(compare);
3834 };
3835 auto left = _left.val().insist_not_null(),
3836 right = _right.val().insist_not_null();
3837 Boolx1 left_lt_right = curr->second ? left.clone() < right.clone()
3838 : left.clone() > right.clone();
3839 IF (left_lt_right) {
3840 result = -1;
3841 GOTO(compare);
3842 };
3843 Boolx1 left_gt_right = curr->second ? left > right : left < right;
3844 IF (left_gt_right) {
3845 result = 1;
3846 GOTO(compare);
3847 };
3848 } ELSE {
3849 IF (_right.not_null()) {
3850 result = -1;
3851 GOTO(compare);
3852 };
3853 };
3854 } else {
3855 using type = std::conditional_t<std::is_same_v<T, bool>, I32x1, PrimitiveExpr<T>>;
3856 Var<type> left, right;
3857 if constexpr (std::is_same_v<T, bool>) {
3858 left = val_left.insist_not_null().template to<int32_t>();
3859 right = val_right.insist_not_null().template to<int32_t>();
3860 } else {
3861 left = val_left.insist_not_null();
3862 right = val_right.insist_not_null();
3863 }
3864
3865 /*----- Compare both with current order expression and potentially set result. -----*/
3866 Boolx1 left_lt_right = curr->second ? left < right : left > right;
3867 IF (left_lt_right) {
3868 result = -1;
3869 GOTO(compare);
3870 };
3871 Boolx1 left_gt_right = curr->second ? left > right : left < right;
3872 IF (left_gt_right) {
3873 result = 1;
3874 GOTO(compare);
3875 };
3876 }
3877 },
3878 [&](NChar val_left) -> void {
3879 auto &cs = as<const CharacterSequence>(*curr->first.get().type());
3880
3881 /*----- Compile order expression for right tuple. -----*/
3882 NChar val_right = env_right.template compile<NChar>(curr->first);
3883
3884 Var<Ptr<Charx1>> _left(val_left.val()), _right(val_right.val());
3886 NChar left(_left, false, val_left.length(), val_left.guarantees_terminating_nul()),
3887 right(_right, false, val_right.length(), val_right.guarantees_terminating_nul());
3888
3889 M_insist(val_left.can_be_null() == val_right.can_be_null(),
3890 "either both or none of the value to compare must be nullable");
3891 if (val_left.can_be_null()) {
3892 /*----- Compare both with current order expression and potentially set result. -----*/
3893 IF (_left.not_null()) {
3894 IF (_right.is_null()) {
3895 result = 1;
3896 GOTO(compare);
3897 };
3898 I32x1 cmp = curr->second ? strcmp(left, right).insist_not_null()
3899 : strcmp(right, left).insist_not_null();
3900 IF (cmp.clone() != 0) {
3901 result = cmp;
3902 GOTO(compare);
3903 };
3904 } ELSE {
3905 IF (_right.not_null()) {
3906 result = -1;
3907 GOTO(compare);
3908 };
3909 };
3910 } else {
3911 /*----- Compare both with current order expression and potentially set result. -----*/
3912 I32x1 cmp = curr->second ? strcmp(left, right).insist_not_null()
3913 : strcmp(right, left).insist_not_null();
3914 IF (cmp.clone() != 0) {
3915 result = cmp;
3916 GOTO(compare);
3917 };
3918 }
3919 },
3920 [](auto&&) -> void { M_unreachable("SIMDfication currently not supported"); },
3921 [](std::monostate) -> void { M_unreachable("invalid expression"); }
3922 }, _val_left);
3923
3924 /*----- Recurse to next comparison. -----*/
3925 rec(std::next(curr), end, rec);
3926 };
3927 emit_comparison_rec(order.cbegin(), order.cend(), emit_comparison_rec);
3928 }
3929
3930 /* GOTOs from above jump here */
3931
3932 return result;
3933 }
3934}
3935
3936// explicit instantiations to prevent linker errors
3937template I32x1 m::wasm::compare<false>(
3938 const Environment&, const Environment&, const std::vector<SortingOperator::order_type>&
3939);
3940template I32x1 m::wasm::compare<true>(
3941 const Environment&, const Environment&, const std::vector<SortingOperator::order_type>&
3942);
__attribute__((constructor(202))) static void register_interpreter()
#define id(X)
#define M_insist_no_ternary_logic()
Definition: WasmDSL.hpp:45
#define Wasm_insist(...)
Definition: WasmDSL.hpp:373
#define Throw(...)
Definition: WasmMacro.hpp:48
#define ELSE
Definition: WasmMacro.hpp:24
#define LOOP(...)
Definition: WasmMacro.hpp:30
#define WHILE(...)
Definition: WasmMacro.hpp:43
#define BLOCK(...)
Definition: WasmMacro.hpp:15
#define PARAMETER(IDX)
Definition: WasmMacro.hpp:20
#define IF(COND)
Definition: WasmMacro.hpp:23
#define FUNCTION(NAME, TYPE)
Definition: WasmMacro.hpp:17
#define DO_WHILE(...)
Definition: WasmMacro.hpp:37
#define BLOCK_OPEN(BLK)
Definition: WasmMacro.hpp:8
#define RETURN(RES)
Definition: WasmMacro.hpp:21
void convert_in_place(SQL_t &operand)
Convert operand of some SQL_t type to the target type.
Definition: WasmUtil.cpp:61
#define BINOP(OP)
#define CMPOP(OP, STRCMP_OP)
std::conditional_t< CanBeNull, _Bool< L >, Bool< L > > compile_cnf(ExprCompiler &C, const cnf::CNF &cnf)
Definition: WasmUtil.cpp:112
#define UNOP(OP)
#define CALL(TYPE)
void add(const char *group_name, const char *short_name, const char *long_name, const char *description, Callback &&callback)
Adds a new group option to the ArgParser.
Definition: ArgParser.hpp:84
#define M_unreachable(MSG)
Definition: macro.hpp:146
#define M_CONSTEXPR_COND(COND, IF_TRUE, IF_FALSE)
Definition: macro.hpp:54
#define M_insist(...)
Definition: macro.hpp:129
const Schema const Schema & tuple_schema
Definition: DataLayout.hpp:255
std::size_t get_num_simd_lanes(const DataLayout &layout, const Schema &layout_schema, const Schema &tuple_schema)
Returns the number of SIMD lanes used for accessing tuples of schema tuple_schema in SIMDfied manner ...
Definition: DataLayout.cpp:244
const Schema & layout_schema
Definition: DataLayout.hpp:255
_I32x1 strcmp(NChar left, NChar right, bool reverse=false)
Compares two strings left and right.
Definition: WasmUtil.cpp:3269
_Boolx1 like_prefix(NChar str, const ThreadSafePooledString &pattern)
Checks whether the string str has the prefix pattern.
Definition: WasmUtil.cpp:3637
_Boolx1 like_contains(NChar str, const ThreadSafePooledString &pattern)
Checks whether the string str contains the pattern pattern.
Definition: WasmUtil.cpp:3542
_I32x1 strncmp(NChar left, NChar right, U32x1 len, bool reverse=false)
Compares two strings left and right.
Definition: WasmUtil.cpp:3101
std::variant< std::monostate #define ADD_TYPE(TYPE) SQL_TYPES(ADD_TYPE) > SQL_t
Definition: WasmUtil.hpp:397
Ptr< Charx1 > strncpy(Ptr< Charx1 > dst, Ptr< Charx1 > src, U32x1 count)
Copies the contents of src to dst, but no more than count characters.
Definition: WasmUtil.cpp:3310
template I32x1 compare< true >(const Environment &, const Environment &, const std::vector< SortingOperator::order_type > &)
typename detail::_var_helper< T >::type _Var
Local variable that can always be NULL.
Definition: WasmDSL.hpp:5785
Bool< L > T mask
Definition: WasmUtil.hpp:1325
std::tuple< Block, Block, Block > compile_store_sequential(const Schema &tuple_value_schema, const Schema &tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, std::size_t num_simd_lanes, const Schema &layout_schema, Variable< uint64_t, Kind, false > &tuple_id)
Compiles the data layout layout containing tuples of schema layout_schema such that it sequentially s...
typename detail::var_helper< T >::type Var
Local variable.
Definition: WasmDSL.hpp:5780
void compile_store_point_access(const Schema &tuple_value_schema, const Schema &tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, const Schema &layout_schema, U64x1 tuple_id)
Compiles the data layout layout starting at memory address base_address and containing tuples of sche...
Definition: WasmUtil.cpp:2471
std::variant< std::monostate #define ADD_TYPE(TYPE) SQL_ADDR_TYPES(ADD_TYPE) > SQL_addr_t
Definition: WasmUtil.hpp:404
PrimitiveExpr< bool, L > eqz() and(L
auto make_signed()
Conversion of a PrimitiveExpr<T, L> to a PrimitiveExpr<std::make_signed_t<T>, L>.
Definition: WasmDSL.hpp:3651
void GOTO(const Block &block)
Jumps to the end of block.
Definition: WasmDSL.hpp:6200
template I32x1 compare< false >(const Environment &, const Environment &, const std::vector< SortingOperator::order_type > &)
and
Constructs a new PrimitiveExpr from a constant value.
Definition: WasmDSL.hpp:1520
std::size_t L
Definition: WasmDSL.hpp:528
void compile_load_point_access(const Schema &tuple_value_schema, const Schema &tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, const Schema &layout_schema, U64x1 tuple_id)
Compiles the data layout layout starting at memory address base_address and containing tuples of sche...
Definition: WasmUtil.cpp:2479
typename detail::global_helper< T >::type Global
Global variable.
Definition: WasmDSL.hpp:5790
Bool< L > value
Definition: WasmUtil.hpp:1317
Bool< L > is_null(SQL_t &variant)
Definition: WasmUtil.hpp:461
std::tuple< Block, Block, Block > compile_store_sequential_single_pass(const Schema &tuple_value_schema, const Schema &tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, std::size_t num_simd_lanes, const Schema &layout_schema, Variable< uint64_t, Kind, false > &tuple_id)
Compiles the data layout layout containing tuples of schema layout_schema such that it sequentially s...
auto Select(C &&_cond, T &&_tru, U &&_fals)
Definition: WasmDSL.hpp:6216
void compile_data_layout_point_access(const Schema &_tuple_value_schema, const Schema &_tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, const Schema &layout_schema, U64x1 tuple_id)
Compiles the data layout layout starting at memory address base_address and containing tuples of sche...
Definition: WasmUtil.cpp:1945
_Boolx1 like(NChar str, NChar pattern, const char escape_char='\\')
Checks whether the string str matches the pattern pattern regarding SQL LIKE semantics using escape c...
Definition: WasmUtil.cpp:3359
Bool< L > uint8_t n
Definition: WasmUtil.hpp:1318
void discard()
Discards this.
Definition: WasmDSL.hpp:1589
std::tuple< Block, Block, Block > compile_load_sequential(const Schema &tuple_value_schema, const Schema &tuple_addr_schema, Ptr< void > base_address, const storage::DataLayout &layout, std::size_t num_simd_lanes, const Schema &layout_schema, Variable< uint64_t, Kind, false > &tuple_id)
Compiles the data layout layout containing tuples of schema layout_schema such that it sequentially l...
PrimitiveExpr< uint64_t, L > L L L L U
Definition: WasmDSL.hpp:2353
_Boolx1 like_suffix(NChar str, const ThreadSafePooledString &pattern)
Checks whether the string str has the suffix pattern.
Definition: WasmUtil.cpp:3663
for(std::size_t idx=1;idx< num_vectors;++idx) res.emplace((vectors_[idx].bitmask()<< uint32_t(idx *vector_type return * res
Definition: WasmDSL.hpp:3697
auto op
Definition: WasmDSL.hpp:2385
std::variant< std::monostate, _Boolx1, _Boolx16, _Boolx32 > SQL_boolean_t
Definition: WasmUtil.hpp:409
std::size_t bool
Definition: WasmDSL.hpp:528
I32x1 compare(const Environment &env_left, const Environment &env_right, const std::vector< SortingOperator::order_type > &order)
Compares two tuples, which must be already loaded into the environments env_left and env_right,...
Definition: WasmUtil.cpp:3697
void CONTINUE(std::size_t level=1)
Definition: WasmDSL.hpp:6188
std::pair<::wasm::Expression *, std::list< std::shared_ptr< Bit > > > move()
Moves the underlying Binaryen ::wasm::Expression and the referenced bits out of this.
Definition: WasmDSL.hpp:1568
typename uint< W >::type uint_t
Definition: WasmDSL.hpp:340
PrimitiveExpr clone() const
Creates and returns a deep copy of this.
Definition: WasmDSL.hpp:1578
cmp_op
‍comparison operations, e.g. for string comparison
Definition: WasmUtil.hpp:1336
void BREAK(std::size_t level=1)
Definition: WasmDSL.hpp:6177
static constexpr std::size_t num_simd_lanes
‍the number of SIMD lanes of the represented expression, i.e. 1 for scalar and at least 2 for vectori...
Definition: WasmDSL.hpp:1467
‍mutable namespace
Definition: Backend.hpp:10
M_EXPORT constexpr bool is_pow_2(T n)
Definition: fn.hpp:129
std::function< void(void)> pipeline_t
bool M_EXPORT contains(const H &haystack, const N &needle)
Checks whether haystack contains needle.
Definition: fn.hpp:383
void swap(PlanTableBase< Actual > &first, PlanTableBase< Actual > &second)
Definition: PlanTable.hpp:394
T(x)
ThreadSafeStringPool::proxy_type ThreadSafePooledString
Definition: Pool.hpp:464
and
Definition: enum_ops.hpp:12
bool M_EXPORT like(const std::string &str, const std::string &pattern, const char escape_char='\\')
Compares a SQL-style LIKE pattern with the given std::string.
Definition: fn.cpp:70
and arithmetic< U > and same_signedness< T, U > U
Definition: concepts.hpp:90
std::string interpret(const std::string &str, char esc='\\', char quote='"')
Definition: fn.hpp:319
void M_EXPORT setbit(T *bytes, bool value, uint32_t n)
Definition: fn.hpp:442
auto visit(Callable &&callable, Base &obj, m::tag< Callable > &&=m::tag< Callable >())
Generic implementation to visit a class hierarchy, with similar syntax as std::visit.
Definition: Visitor.hpp:138
‍command-line options for the HeuristicSearchPlanEnumerator
Definition: V8Engine.cpp:44
STL namespace.
The boolean type.
Definition: Type.hpp:230
The catalog contains all Databases and keeps track of all meta information of the database system.
Definition: Catalog.hpp:215
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
Definition: Catalog.hpp:274
static Catalog & Get()
Return a reference to the single Catalog instance.
m::ArgParser & arg_parser()
Definition: Catalog.hpp:253
The type of character strings, both fixed length and varying length.
Definition: Type.hpp:290
The date type.
Definition: Type.hpp:364
The date type.
Definition: Type.hpp:335
fnid_t fnid
the function id
Definition: Schema.hpp:841
A Type that represents the absence of any other type.
Definition: Type.hpp:204
The numeric type represents integer and floating-point types of different precision and scale.
Definition: Type.hpp:393
uint64_t size() const override
Compute the size in bits of an instance of this type.
Definition: Type.hpp:433
Pooled< T, Pool, false > assert_not_none() const
Definition: Pool.hpp:239
An Identifier is composed of a name and an optional prefix.
Definition: Schema.hpp:42
A Schema represents a sequence of identifiers, optionally with a prefix, and their associated types.
Definition: Schema.hpp:39
std::size_t num_entries() const
Returns the number of entries in this Schema.
Definition: Schema.hpp:124
const_iterator cend() const
Definition: Schema.hpp:121
Schema deduplicate() const
Returns a deduplicated version of this Schema, i.e.
Definition: Schema.hpp:190
iterator find(const Identifier &id)
Returns an iterator to the entry with the given Identifier id, or end() if no such entry exists.
Definition: Schema.hpp:129
Schema drop_constants() const
Returns a copy of this Schema where all constant entries are removed.
Definition: Schema.hpp:200
bool is_none() const
Definition: Type.hpp:72
A binary expression.
Definition: AST.hpp:348
std::unique_ptr< Expr > lhs
Definition: AST.hpp:349
const Numeric * common_operand_type
Definition: AST.hpp:351
std::unique_ptr< Expr > rhs
Definition: AST.hpp:350
Token op() const
Definition: AST.hpp:377
A constant: a string literal or a numeric constant.
Definition: AST.hpp:213
A designator.
Definition: AST.hpp:134
Token table_name
Definition: AST.hpp:138
Token attr_name
Definition: AST.hpp:139
The error expression.
Definition: AST.hpp:116
const Type * type() const
Returns the Type of this Expr.
Definition: AST.hpp:58
A function application.
Definition: AST.hpp:246
std::vector< std::unique_ptr< Expr > > args
Definition: AST.hpp:250
const Function & get_function() const
Definition: AST.hpp:312
A query expression for nested queries.
Definition: AST.hpp:389
const ThreadSafePooledString & alias() const
Definition: AST.hpp:421
TokenType type
Definition: Token.hpp:17
ThreadSafePooledOptionalString text
declared as optional for dummy tokens
Definition: Token.hpp:16
A unary expression: "+e", "-e", "~e", "NOT e".
Definition: AST.hpp:324
Token op() const
Definition: AST.hpp:336
std::unique_ptr< Expr > expr
Definition: AST.hpp:325
A CNF represents a conjunction of cnf::Clauses.
Definition: CNF.hpp:134
bool can_be_null() const
Returns true iff this CNF formula is nullable, i.e.
Definition: CNF.hpp:155
This is an interface for factories that compute particular DataLayouts for a given sequence of Types,...
virtual size_type num_tuples() const =0
‍returns the number of tuples represented by an instance of this node
Models how data is laid out in a linear address space.
Definition: DataLayout.hpp:29
bool is_finite() const
‍returns true iff this DataLayout lays out a finite sequence of tuples
Definition: DataLayout.hpp:200
uint64_t stride_in_bits() const
‍return the stride (in bits) of the single child of the DataLayout
Definition: DataLayout.hpp:207
const Node & child() const
‍returns a reference to the single child of this DataLayout
Definition: DataLayout.hpp:209
size_type num_tuples() const
‍returns the number of tuples laid out by this DataLayout; must not be called when not is_finite()
Definition: DataLayout.hpp:202
std::vector< level_info_t > level_info_stack_t
Definition: DataLayout.hpp:53
void for_sibling_leaves(callback_leaves_t callback) const
Definition: DataLayout.cpp:161
Represents a code block, i.e.
Definition: WasmDSL.hpp:1006
void attach_to_current()
Attaches this Block to the wasm::Block currently active in the Module.
Definition: WasmDSL.hpp:1085
bool empty() const
Returns whether this Block is empty, i.e.
Definition: WasmDSL.hpp:1076
Buffers tuples by materializing them into memory.
Definition: WasmUtil.hpp:1070
buffer_load_proxy_t< IsGlobal > create_load_proxy(param_t tuple_value_schema=param_t(), param_t tuple_addr_schema=param_t()) const
Creates and returns a proxy object to load value tuples of schema tuple_value_schema (default: entire...
Definition: WasmUtil.cpp:2534
void execute_pipeline_inline(setup_t setup, pipeline_t pipeline, teardown_t teardown, param_t tuple_value_schema=param_t(), param_t tuple_addr_schema=param_t()) const
Emits code inline to execute the given pipeline pipeline for each value tuple of schema tuple_value_s...
Definition: WasmUtil.cpp:2832
void resume_pipeline(param_t tuple_value_schema=param_t(), param_t tuple_addr_schema=param_t()) const
Emits code into a separate function to resume the pipeline for each value tuple of schema tuple_value...
Definition: WasmUtil.cpp:2678
Buffer(const Schema &schema, const storage::DataLayoutFactory &factory, bool load_simdfied=false, std::size_t num_tuples=0, setup_t setup=setup_t::Make_Without_Parent(), pipeline_t pipeline=pipeline_t(), teardown_t teardown=teardown_t::Make_Without_Parent())
Creates a buffer for num_tuples tuples (0 means infinite) of schema schema using the data layout crea...
Definition: WasmUtil.cpp:2493
void execute_pipeline(setup_t setup, pipeline_t pipeline, teardown_t teardown, param_t tuple_value_schema=param_t(), param_t tuple_addr_schema=param_t()) const
Emits code into a separate function to execute the give pipeline pipeline for each value tuple of sch...
Definition: WasmUtil.cpp:2760
const Schema & schema() const
Returns the schema of the buffer.
Definition: WasmUtil.hpp:1106
void consume()
Emits code to store the current tuple into the buffer.
Definition: WasmUtil.cpp:2909
buffer_swap_proxy_t< IsGlobal > create_swap_proxy(param_t tuple_schema=param_t()) const
Creates and returns a proxy object to swap tuples of schema tuple_schema (default: entire tuples) in ...
Definition: WasmUtil.cpp:2569
std::optional< std::reference_wrapper< const Schema > > param_t
‍parameter type for proxy creation and pipeline resuming methods
Definition: WasmUtil.hpp:1073
void setup()
Performs the setup of all local variables of this buffer (by reading them from the global backups iff...
Definition: WasmUtil.cpp:2582
storage::DataLayout layout_
data layout of buffer
Definition: WasmUtil.hpp:1076
buffer_storage< IsGlobal > storage_
if IsGlobal, contains backups for base address, capacity, and size
Definition: WasmUtil.hpp:1082
buffer_store_proxy_t< IsGlobal > create_store_proxy(param_t tuple_schema=param_t()) const
Creates and returns a proxy object to store tuples of schema tuple_schema (default: entire tuples) to...
Definition: WasmUtil.cpp:2556
void resume_pipeline_inline(param_t tuple_value_schema=param_t(), param_t tuple_addr_schema=param_t()) const
Emits code inline to resume the pipeline for each value tuple of schema tuple_value_schema (default: ...
Definition: WasmUtil.cpp:2754
void teardown()
Performs the teardown of all local variables of this buffer (by storing them into the global backups ...
Definition: WasmUtil.cpp:2641
std::size_t num_simd_lanes() const
Returns the number of SIMD lanes used.
Definition: WasmUtil.hpp:939
std::size_t num_simd_lanes_preferred() const
Returns the number of SIMD lanes preferred by other operators.
Definition: WasmUtil.hpp:944
static thread_local std::unique_ptr< CodeGenContext > the_context_
Definition: WasmUtil.hpp:878
Environment & env()
Returns the current Environment.
Definition: WasmUtil.hpp:905
void set_num_simd_lanes(std::size_t n)
Sets the number of SIMD lanes used to n.
Definition: WasmUtil.hpp:941
static CodeGenContext & Get()
Definition: WasmUtil.hpp:889
Scope scoped_environment()
Creates a new, scoped Environment.
Definition: WasmUtil.hpp:897
Binds Schema::Identifiers to Expr<T>s.
Definition: WasmUtil.hpp:563
auto compile(T &&t) const
‍Compile t by delegating compilation to an ExprCompiler for this Environment.
Definition: WasmUtil.hpp:742
void dump() const
Definition: WasmUtil.cpp:539
void add(Schema::Identifier id, T &&expr)
‍Adds a mapping from id to expr.
Definition: WasmUtil.hpp:619
std::unordered_map< Schema::Identifier, SQL_t > exprs_
‍maps Schema::Identifiers to Expr<T>s that evaluate to the current expression
Definition: WasmUtil.hpp:566
SQL_t get(const Schema::Identifier &id) const
‍Returns the copied entry for identifier id.
Definition: WasmUtil.hpp:699
std::unordered_map< Schema::Identifier, SQL_addr_t > expr_addrs_
‍maps Schema::Identifiers to Ptr<Expr<T>>s that evaluate to the address of the current expression
Definition: WasmUtil.hpp:568
Compiles AST expressions m::Expr to Wasm ASTs m::wasm::Expr<T>.
Definition: WasmUtil.hpp:495
void operator()(const ast::ErrorExpr &) override
Definition: WasmUtil.cpp:153
SQL_t compile(const m::ast::Expr &e)
‍Compiles a m::Expr e of statically unknown type to a SQL_t.
Definition: WasmUtil.hpp:506
void set(SQL_t &&value)
Definition: WasmUtil.hpp:547
const Environment & env_
‍the environment to use for resolving designators to Expr<T>s
Definition: WasmUtil.hpp:500
A handle to create a Function and to create invocations of that function.
Definition: WasmDSL.hpp:1368
Helper struct for garbage collection done by the Module.
Definition: WasmDSL.hpp:604
C & add_garbage_collected_data(void *handle, Args... args)
Adds and returns an instance of.
Definition: WasmDSL.hpp:917
static Module & Get()
Definition: WasmDSL.hpp:715
friend struct Allocator
Definition: WasmDSL.hpp:653
std::size_t length() const
Definition: WasmUtil.hpp:81
bool guarantees_terminating_nul() const
Definition: WasmUtil.hpp:83
NChar clone() const
Definition: WasmUtil.hpp:53
bool can_be_null() const
Definition: WasmUtil.hpp:80
Ptr< Charx1 > val()
Definition: WasmUtil.hpp:55
Proxy to implement loads from a buffer.
Definition: WasmUtil.hpp:1207
Proxy to implement stores to a buffer.
Definition: WasmUtil.hpp:1243
Proxy to implement swaps in a buffer.
Definition: WasmUtil.hpp:1276
void operator()(U64x1 first, U64x1 second)
Swaps tuples with IDs first and second.
Definition: WasmUtil.cpp:2979
Helper type to deduce the Expr<U> type given a.
Definition: WasmDSL.hpp:160