mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
DataLayout.cpp
Go to the documentation of this file.
2
5
6
7using namespace m;
8using namespace m::storage;
9
10
11/*----------------------------------------------------------------------------------------------------------------------
12 * DataLayout::Node
13 *--------------------------------------------------------------------------------------------------------------------*/
14
16
17
18/*----------------------------------------------------------------------------------------------------------------------
19 * DataLayout::Leaf
20 *--------------------------------------------------------------------------------------------------------------------*/
21
22void DataLayout::Leaf::accept(ConstDataLayoutVisitor &v) const { v(*this); };
23
24
25/*----------------------------------------------------------------------------------------------------------------------
26 * DataLayout::INode
27 *--------------------------------------------------------------------------------------------------------------------*/
28
30 uint64_t offset_in_bits, uint64_t stride_in_bits)
31{
32 M_insist(this->num_tuples() != 1 or stride_in_bits == 0, "no stride without repetition");
33 M_insist(stride_in_bits % 8 == 0 or type->is_boolean() or type->is_bitmap(),
34 "only booleans and bitmaps may not be byte aligned");
35 M_insist(offset_in_bits % 8 == 0 or type->is_boolean() or type->is_bitmap(),
36 "only booleans and bitmaps may not be byte aligned");
37
38 auto leaf = new Leaf(type, idx);
39 children_.emplace_back(child_t{
40 .ptr = std::unique_ptr<DataLayout::Node>(as<Node>(leaf)),
41 .offset_in_bits = offset_in_bits,
42 .stride_in_bits = stride_in_bits,
43 });
44 return *leaf;
45}
46
48{
49 M_insist(num_tuples != 0, "the new INode must be large enough for at least one tuple");
50 M_insist(this->num_tuples() != 1 or stride_in_bits == 0, "no stride without repetition");
51 M_insist(this->num_tuples() % num_tuples == 0,
52 "the number of tuples in the parent must be a whole multiple of the number of tuples of the newly created "
53 "INode");
54 M_insist(offset_in_bits % 8 == 0, "the offset of the newly created INode must be byte aligned");
55 M_insist(stride_in_bits % 8 == 0, "the stride of the newly created INode must be byte aligned");
56
57 auto inode = new INode(num_tuples);
58 children_.emplace_back(child_t{
59 .ptr = std::unique_ptr<DataLayout::Node>(as<Node>(inode)),
60 .offset_in_bits = offset_in_bits,
61 .stride_in_bits = stride_in_bits,
62 });
63 return *inode;
64}
65
66void DataLayout::INode::accept(ConstDataLayoutVisitor &v) const { v(*this); }
67
68void DataLayout::INode::for_sibling_leaves(level_info_stack_t &level_info_stack, uint64_t inode_offset_in_bits,
69 const callback_leaves_t &callback) const
70{
71 std::vector<leaf_info_t> leaves;
72 leaves.reserve(this->num_children());
73
74 for (auto &child : *this) {
75 if (auto child_leaf = cast<const Leaf>(child.ptr.get())) {
76 leaves.emplace_back(leaf_info_t{
77 .leaf = *child_leaf,
78 .offset_in_bits = child.offset_in_bits,
79 .stride_in_bits = child.stride_in_bits,
80 });
81 } else {
82 auto child_inode = as<const INode>(child.ptr.get());
83 level_info_stack.emplace_back(level_info_t{
84 .stride_in_bits = child.stride_in_bits,
85 .num_tuples = child_inode->num_tuples(),
86 });
87 child_inode->for_sibling_leaves(level_info_stack, inode_offset_in_bits + child.offset_in_bits, callback);
88 level_info_stack.pop_back();
89 }
90 }
91
92 if (not leaves.empty())
93 callback(leaves, level_info_stack, inode_offset_in_bits);
94}
95
97namespace {
98
100std::ostream & indent(std::ostream &out, unsigned indentation)
101{
102 out << '\n' << std::string(4 * indentation, ' ');
103 return out;
104}
105
106}
107
108void DataLayout::INode::print(std::ostream &out, unsigned int indentation) const
109{
110 const std::size_t decimal_places = std::ceil(num_children() / Numeric::DECIMAL_TO_BINARY_DIGITS);
111
112 auto it = cbegin();
113 for (std::size_t i = 0; i != num_children(); ++i, ++it) {
114 M_insist(it != cend());
115
116 indent(out, indentation) << "[" << std::setw(decimal_places) << i << "]: ";
117
118 auto &child = *it;
119 if (auto child_leaf = cast<const Leaf>(child.ptr.get())) {
120 out << "Leaf " << child_leaf->index() << " of type " << *child_leaf->type() << " with bit offset "
121 << child.offset_in_bits << " and bit stride " << child.stride_in_bits;
122 } else {
123 auto child_inode = as<const INode>(child.ptr.get());
124 out << "INode of " << child_inode->num_tuples() << " tuple(s) with bit offset " << child.offset_in_bits
125 << " and bit stride " << child.stride_in_bits;
126 child_inode->print(out, indentation + 1);
127 }
128 }
129}
131
132
133/*----------------------------------------------------------------------------------------------------------------------
134 * DataLayout
135 *--------------------------------------------------------------------------------------------------------------------*/
136
138{
139 M_insist(inode_.num_children() == 0, "child already set");
140 return inode_.add_leaf(type, idx, 0, stride_in_bits);
141}
142
144{
145 M_insist(inode_.num_children() == 0, "child already set");
146 M_insist(num_tuples != 0, "the new INode must be large enough for at least one tuple");
147 M_insist(inode_.num_tuples() != 1 or stride_in_bits == 0, "no stride without repetition");
148 M_insist(stride_in_bits % 8 == 0, "the stride of the newly created INode must be byte aligned");
149
150 auto inode = new INode(num_tuples);
151 inode_.children_.emplace_back(INode::child_t{
152 .ptr = std::unique_ptr<DataLayout::Node>(as<Node>(inode)),
153 .offset_in_bits = 0,
154 .stride_in_bits = stride_in_bits,
155 });
156 return *inode;
157}
158
159void DataLayout::accept(ConstDataLayoutVisitor &v) const { v(*this); }
160
162{
163 level_info_stack_t level_info_stack;
164 inode_.for_sibling_leaves(level_info_stack, 0, callback);
165}
166
167
168/*======================================================================================================================
169 * Helper functions for SIMD support
170 *====================================================================================================================*/
171
172bool m::storage::supports_simd(const DataLayout &layout, const Schema &layout_schema, const Schema &tuple_schema)
173{
174 const bool needs_null_bitmap = [&]() {
175 for (auto &tuple_entry : tuple_schema) {
176 if (layout_schema[tuple_entry.id].second.nullable())
177 return true; // found an entry in `tuple_schema` that can be NULL according to `layout_schema`
178 }
179 return false; // no attribute in `tuple_schema` can be NULL according to `layout_schema`
180 }();
181
182 auto test_simd_support = [&](const DataLayout::INode& inode, auto &rec) {
183 if (inode.num_children() == 0)
184 return false; // invalid data layout
185
186 if (auto child_inode = cast<const DataLayout::INode>(inode[0].ptr.get());
187 inode.num_children() == 1 and child_inode)
188 {
189 return rec(*child_inode, rec); // recurse for single INode child
190 }
191
192 std::size_t num_simd_lanes = 1;
193 for (auto &child : inode) {
194 if (cast<const DataLayout::INode>(child.ptr.get()))
195 return false; // multiple INode children or mixed Leaf and INode children
196
197 auto &child_leaf = as<const DataLayout::Leaf>(*child.ptr);
198 const uint8_t bit_stride = child.stride_in_bits % 8;
199 if (child_leaf.index() == layout_schema.num_entries()) { // NULL bitmap
200 if (not needs_null_bitmap)
201 continue; // no NULL bitmap needed
202
203 if (bit_stride) {
204 return false; // NULL bitmap with bit stride currently not supported
205 } else {
206 if (tuple_schema.num_entries() > 64)
207 return false; // bytes containing a NULL bitmap must fit into scalar value
208 if (std::max(ceil_to_pow_2(tuple_schema.num_entries()), 8UL) != child.stride_in_bits)
209 return false; // distance between two NULL bits of a single attribute must be a power of 2
210 if (child.offset_in_bits % 8 != 0)
211 return false; // NULL bitmaps must not start with bit offset
212 }
213
214 num_simd_lanes = std::max<std::size_t>(num_simd_lanes, 16); // repeat 16 times for 128bit SIMD vectors
215 } else { // regular entry
216 auto tuple_it = tuple_schema.find(layout_schema[child_leaf.index()].id);
217 if (tuple_it == tuple_schema.end())
218 continue; // entry not contained in tuple schema
219 M_insist(*tuple_it->type == *child_leaf.type());
220
221 if (bit_stride) {
222 if (child.stride_in_bits != 1)
223 return false; // stride must be 1 bit
224 } else {
225 if (tuple_it->type->is_boolean() and child.stride_in_bits != 8)
226 return false; // booleans must be packed consecutively in bytes
227 if (tuple_it->type->is_character_sequence())
228 return false; // string SIMDfication currently not supported
229 }
230
231 const uint64_t size_in_bytes = (tuple_it->type->size() + 7) / 8;
232 num_simd_lanes = std::max<std::size_t>(num_simd_lanes, 16 / size_in_bytes); // repeat to fill 128bit SIMD vector
233 }
234 }
235
236 if (inode.num_tuples() % num_simd_lanes != 0)
237 return false; // number of tuples in INode must be a whole multiple of SIMD width
238
239 return true; // fallthrough
240 };
241 return test_simd_support(static_cast<const DataLayout::INode&>(layout), test_simd_support);
242}
243
245 const Schema &tuple_schema)
246{
247 M_insist(supports_simd(layout, layout_schema, tuple_schema),
248 "layout must support SIMD to retrieve its number of SIMD lanes");
249
250 std::size_t num_simd_lanes = 1;
251 for (auto &tuple_entry : tuple_schema) {
252 if (layout_schema[tuple_entry.id].second.nullable())
253 return 16; // repeat 16 times for 128bit SIMD vector; return immediately since this is the max. #lanes
254
255 const uint64_t size_in_bytes = (tuple_entry.type->size() + 7) / 8;
256 num_simd_lanes = std::max<std::size_t>(num_simd_lanes, 16 / size_in_bytes); // repeat to fill 128bit SIMD vector
257 }
258
259 return num_simd_lanes;
260}
#define M_insist(...)
Definition: macro.hpp:129
std::ostream & indent(std::ostream &out, unsigned indentation)
Start a new line with proper indentation.
Definition: DataLayout.cpp:100
const Schema const Schema & tuple_schema
Definition: DataLayout.hpp:255
std::size_t get_num_simd_lanes(const DataLayout &layout, const Schema &layout_schema, const Schema &tuple_schema)
Returns the number of SIMD lanes used for accessing tuples of schema tuple_schema in SIMDfied manner ...
Definition: DataLayout.cpp:244
const Schema & layout_schema
Definition: DataLayout.hpp:255
‍mutable namespace
Definition: Backend.hpp:10
and
Definition: enum_ops.hpp:12
static constexpr float DECIMAL_TO_BINARY_DIGITS
How many binary digits fit into a single decimal digit.
Definition: Type.hpp:400
A Schema represents a sequence of identifiers, optionally with a prefix, and their associated types.
Definition: Schema.hpp:39
std::size_t num_entries() const
Returns the number of entries in this Schema.
Definition: Schema.hpp:124
iterator end()
Definition: Schema.hpp:117
iterator find(const Identifier &id)
Returns an iterator to the entry with the given Identifier id, or end() if no such entry exists.
Definition: Schema.hpp:129
This class represents types in the SQL type system.
Definition: Type.hpp:46
bool is_boolean() const
Definition: Type.hpp:75
bool is_bitmap() const
Definition: Type.hpp:76
‍a child Node and its relative offset and stride within the containing INode
Definition: DataLayout.hpp:104
An internal node of the recursive data layout model.
Definition: DataLayout.hpp:99
std::vector< child_t > children_
‍the child Nodes of this INode
Definition: DataLayout.hpp:142
void print(std::ostream &out, unsigned indentation=0) const
Definition: DataLayout.cpp:108
size_type num_tuples() const override
‍returns the number of tuples represented by an instance of this node
Definition: DataLayout.hpp:153
Leaf & add_leaf(const m::Type *type, size_type idx, uint64_t offset_in_bits, uint64_t stride_in_bits)
Creates a Leaf and adds it as a child to this INode.
Definition: DataLayout.cpp:29
void for_sibling_leaves(level_info_stack_t &level_info_stack, uint64_t inode_offset_in_bits, const callback_leaves_t &callback) const
Definition: DataLayout.cpp:68
void accept(ConstDataLayoutVisitor &v) const override
Definition: DataLayout.cpp:66
INode & add_inode(size_type num_tuples, uint64_t offset_in_bits, uint64_t stride_in_bits)
Creates an INode and adds it as a child to this INode.
Definition: DataLayout.cpp:47
size_type num_children() const
‍returns the number of child Nodes of this INode
Definition: DataLayout.hpp:155
The Leaf represents exactly one attribue.
Definition: DataLayout.hpp:73
void accept(ConstDataLayoutVisitor &v) const override
Definition: DataLayout.cpp:22
virtual size_type num_tuples() const =0
‍returns the number of tuples represented by an instance of this node
‍combines information of a single leaf for for_sibling_leaves()
Definition: DataLayout.hpp:38
‍combines information of a single internal level inside the DataLayout, used by for_sibling_leaves()
Definition: DataLayout.hpp:46
uint64_t stride_in_bits
‍the stride of instances of this level
Definition: DataLayout.hpp:48
Models how data is laid out in a linear address space.
Definition: DataLayout.hpp:29
INode inode_
‍use an INode to store a single child, allowing us to exploit INode abstractions within DataLayout
Definition: DataLayout.hpp:191
uint64_t stride_in_bits() const
‍return the stride (in bits) of the single child of the DataLayout
Definition: DataLayout.hpp:207
const Node & child() const
‍returns a reference to the single child of this DataLayout
Definition: DataLayout.hpp:209
INode & add_inode(size_type num_tuples, uint64_t stride_in_bits)
Creates an INode and adds it as a child to this DataLayout's internal INode.
Definition: DataLayout.cpp:143
size_type num_tuples() const
‍returns the number of tuples laid out by this DataLayout; must not be called when not is_finite()
Definition: DataLayout.hpp:202
std::vector< level_info_t > level_info_stack_t
Definition: DataLayout.hpp:53
Leaf & add_leaf(const m::Type *type, size_type idx, uint64_t stride_in_bits)
Creates a Leaf and adds it as a child to this DataLayout's internal INode.
Definition: DataLayout.cpp:137
std::function< void(const std::vector< leaf_info_t > &leaves, const level_info_stack_t &levels, uint64_t inode_offset_in_bits)> callback_leaves_t
Definition: DataLayout.hpp:57
void for_sibling_leaves(callback_leaves_t callback) const
Definition: DataLayout.cpp:161
void accept(ConstDataLayoutVisitor &v) const
Definition: DataLayout.cpp:159