mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
DataLayoutFactory.cpp
Go to the documentation of this file.
2
3#include <memory>
6#include <numeric>
7
8
9using namespace m;
10using namespace m::storage;
11
13std::ostream & m::storage::operator<<(std::ostream &out, const DataLayoutFactory &factory)
14{
15 factory.print(out);
16 return out;
17}
18void DataLayoutFactory::dump(std::ostream &out) const { out << *this << std::endl; }
19void DataLayoutFactory::dump() const { dump(std::cerr); }
21
22namespace {
23
24namespace options {
25
28
30bool remove_null_bitmap = false;
31
34
35}
36
37__attribute__((constructor(201)))
38static void add_storage_args()
39{
40 Catalog &C = Catalog::Get();
41
42 /*----- Command-line arguments -----*/
43 C.arg_parser().add<bool>(
44 /* group= */ "Storage",
45 /* short= */ nullptr,
46 /* long= */ "--no-attribute-reordering",
47 /* description= */ "do not reorder attributes when creating data layouts, e.g. to minimize padding",
48 /* callback= */ [](bool){ options::attribute_reordering = false; }
49 );
50 C.arg_parser().add<bool>(
51 /* group= */ "Storage",
52 /* short= */ nullptr,
53 /* long= */ "--remove-null-bitmap",
54 /* description= */ "remove the NULL bitmap in all created data layouts",
55 /* callback= */ [](bool){ options::remove_null_bitmap = true; }
56 );
57 C.arg_parser().add<bool>(
58 /* group= */ "Storage",
59 /* short= */ nullptr,
60 /* long= */ "--pax-pack-one-tuple-less",
61 /* description= */ "pack one tuple less than possible into PAX blocks; only used for benchmarking purposes",
62 /* callback= */ [](bool){ options::pax_pack_one_tuple_less = true; }
63 );
64}
65
66}
67
68
72std::unique_ptr<std::size_t[]>
73compute_attribute_order(const std::vector<const Type*> &types)
74{
75 /*----- Collect all indices. -----*/
76 auto indices = std::make_unique<std::size_t[]>(types.size());
77 std::iota(indices.get(), indices.get() + types.size(), 0);
78
79 if (options::attribute_reordering) {
80 /*----- Sort indices by alignment. -----*/
81 std::stable_sort(indices.get(), indices.get() + types.size(), [&](std::size_t left, std::size_t right) {
82 return types[left]->alignment() > types[right]->alignment();
83 });
84 }
85
86 return indices;
87}
88
89DataLayout RowLayoutFactory::make(std::vector<const Type*> types, std::size_t num_tuples) const
90{
91 M_insist(not types.empty(), "cannot make layout for zero types");
92
93 auto indices = compute_attribute_order(types);
94 uint64_t offsets[types.size()]; // in bits
95
96 /*----- Compute offsets. -----*/
97 uint64_t offset_in_bits = 0;
98 uint64_t alignment_in_bits = 8;
99
100 for (std::size_t idx = 0; idx != types.size(); ++idx) {
101 const auto mapped_idx = indices[idx];
102 offsets[mapped_idx] = offset_in_bits;
103 offset_in_bits += types[mapped_idx]->size();
104 alignment_in_bits = std::max(alignment_in_bits, types[mapped_idx]->alignment());
105 }
106
107 const uint64_t null_bitmap_offset = offset_in_bits;
108
109 /*----- Compute row size with padding. -----*/
110 if (not options::remove_null_bitmap)
111 offset_in_bits += types.size(); // space for NULL bitmap
112 if (uint64_t rem = offset_in_bits % alignment_in_bits; rem)
113 offset_in_bits += alignment_in_bits - rem;
114 const uint64_t row_size_in_bits = offset_in_bits;
115
116 /*----- Construct DataLayout. -----*/
117 DataLayout layout(num_tuples);
118 auto &row = layout.add_inode(1, row_size_in_bits);
119 for (std::size_t idx = 0; idx != types.size(); ++idx)
120 row.add_leaf(types[idx], idx, offsets[idx], 0); // add attribute
121 if (not options::remove_null_bitmap) {
122 row.add_leaf( // add NULL bitmap
123 /* type= */ Type::Get_Bitmap(Type::TY_Vector, types.size()),
124 /* idx= */ types.size(),
125 /* offset_in_bits= */ null_bitmap_offset,
126 /* stride_in_bits= */ 0
127 );
128 }
129
130 return layout;
131}
132
133DataLayout PAXLayoutFactory::make(std::vector<const Type*> types, std::size_t num_tuples) const
134{
135 M_insist(not types.empty(), "cannot make layout for zero types");
136
137 auto indices = compute_attribute_order(types);
138 uint64_t offsets[types.size() + 1]; // in bits
139
140 /*----- Compute attribute offsets in a virtual row. -----*/
141 uint64_t offset_in_bits = 0;
142 uint64_t min_size_in_bytes = std::numeric_limits<uint64_t>::max();
143 uint64_t alignment_in_bits = 8;
144 std::size_t num_not_byte_aligned = 0;
145
146 for (std::size_t idx = 0; idx != types.size(); ++idx) {
147 const auto mapped_idx = indices[idx];
148 offsets[mapped_idx] = offset_in_bits;
149 offset_in_bits += types[mapped_idx]->size();
150 min_size_in_bytes = std::min(min_size_in_bytes, (types[mapped_idx]->size() + 7) / 8);
151 alignment_in_bits = std::max(alignment_in_bits, types[mapped_idx]->alignment());
152 if (types[mapped_idx]->size() % 8)
153 ++num_not_byte_aligned;
154 }
155
156 /*----- Compute NULL bitmap offset in a virtual row. -----*/
157 const uint64_t null_bitmap_size_in_bits =
158 options::remove_null_bitmap ? 0 : std::max(ceil_to_pow_2(types.size()), 8UL); // add padding to support SIMDfication
159 offsets[types.size()] = offset_in_bits;
160 if (null_bitmap_size_in_bits % 8)
161 ++num_not_byte_aligned;
162
163 /*----- Compute number of rows per block and number of blocks per row. -----*/
164 const auto num_simd_lanes = std::max<std::size_t>(1, 16 / min_size_in_bytes); // possible number of SIMD lanes
165 std::size_t num_rows_per_block, num_blocks_per_row;
166 if (NTuples == option_) {
167 num_rows_per_block = num_tuples_;
168 if (num_rows_per_block > num_simd_lanes)
169 num_rows_per_block =
170 (num_tuples_ / num_simd_lanes) * num_simd_lanes; // floor to multiple of possible number of SIMD lanes
171 num_blocks_per_row = 1;
172 } else {
173 const uint64_t row_size_in_bits = offsets[types.size()] + null_bitmap_size_in_bits; // space for NULL bitmap
174 /* Compute number of rows within a PAX block. Consider worst case padding of 7 bits (because each column within
175 * a PAX block must be byte aligned) for every possibly not byte-aligned attribute column. Null bitmap column is
176 * ignored since it is the last column. */
177 num_rows_per_block = std::max<std::size_t>(1, (num_bytes_ * 8 - num_not_byte_aligned * 7) / row_size_in_bits);
178 if (num_rows_per_block > num_simd_lanes)
179 num_rows_per_block =
180 (num_rows_per_block / num_simd_lanes) * num_simd_lanes; // floor to multiple of possible number of SIMD lanes
181 if (options::pax_pack_one_tuple_less and num_rows_per_block > 1)
182 --num_rows_per_block;
183 num_blocks_per_row = (row_size_in_bits + num_bytes_ * 8 - 1UL) / (num_bytes_ * 8);
184 }
185
186 /*----- Compute column offsets. -----*/
187 uint64_t running_padding = 0;
188 for (std::size_t idx = 0; idx != types.size(); ++idx) {
189 const auto mapped_idx = indices[idx];
190 offsets[mapped_idx] = offsets[mapped_idx] * num_rows_per_block + running_padding;
191 M_insist(offsets[mapped_idx] % 8 == 0, "attribute column must be byte aligned");
192 if (uint64_t bit_offset = (types[mapped_idx]->size() * num_rows_per_block) % 8; bit_offset)
193 running_padding += 8UL - bit_offset;
194 }
195 offsets[types.size()] = offsets[types.size()] * num_rows_per_block + running_padding;
196
197 /*----- Compute block size. -----*/
198 uint64_t block_size_in_bits;
199 if (NTuples == option_) {
200 block_size_in_bits = offsets[types.size()] + null_bitmap_size_in_bits * num_rows_per_block;
201 if (uint64_t alignment_offset = block_size_in_bits % alignment_in_bits)
202 block_size_in_bits += alignment_in_bits - alignment_offset;
203 } else {
204 block_size_in_bits = num_bytes_ * 8;
205 }
206
207 M_insist(offsets[types.size()] % 8 == 0, "NULL bitmap column must be byte aligned");
208 M_insist(offsets[types.size()] + null_bitmap_size_in_bits * num_rows_per_block <=
209 block_size_in_bits * num_blocks_per_row,
210 "computed block layout must not exceed block size");
211
212 /*----- Construct DataLayout. -----*/
213 DataLayout layout(num_tuples);
214 auto &pax_block = layout.add_inode(num_rows_per_block, num_blocks_per_row * block_size_in_bits);
215 for (std::size_t idx = 0; idx != types.size(); ++idx)
216 pax_block.add_leaf(types[idx], idx, offsets[idx], types[idx]->size());
217 if (not options::remove_null_bitmap) {
218 pax_block.add_leaf( // add NULL bitmap
219 /* type= */ Type::Get_Bitmap(Type::TY_Vector, types.size()),
220 /* idx= */ types.size(),
221 /* offset_in_bits= */ offsets[types.size()],
222 /* stride_in_bits= */ null_bitmap_size_in_bits
223 );
224 }
225
226 return layout;
227}
228
229__attribute__((constructor(202)))
230static void register_data_layouts()
231{
232 Catalog &C = Catalog::Get();
233#define REGISTER_PAX_BYTES(NAME, BLOCK_SIZE, DESCRIPTION) \
234 C.register_data_layout(C.pool(#NAME), std::make_unique<PAXLayoutFactory>(PAXLayoutFactory::NBytes, BLOCK_SIZE), DESCRIPTION)
235#define REGISTER_PAX_TUPLES(NAME, BLOCK_SIZE, DESCRIPTION) \
236 C.register_data_layout(C.pool(#NAME), std::make_unique<PAXLayoutFactory>(PAXLayoutFactory::NTuples, BLOCK_SIZE), DESCRIPTION)
237 REGISTER_PAX_BYTES(PAX4M, 1UL << 22, "stores attributes using PAX layout with 4MiB blocks"); // default
238 REGISTER_PAX_BYTES(PAX4K, 1UL << 12, "stores attributes using PAX layout with 4KiB blocks");
239 REGISTER_PAX_BYTES(PAX64K, 1UL << 16, "stores attributes using PAX layout with 64KiB blocks");
240 REGISTER_PAX_BYTES(PAX512K, 1UL << 19, "stores attributes using PAX layout with 512KiB blocks");
241 REGISTER_PAX_BYTES(PAX64M, 1UL << 26, "stores attributes using PAX layout with 64MiB blocks");
242 REGISTER_PAX_TUPLES(PAX16Tup, 16, "stores attributes using PAX layout with blocks for 16 tuples");
243 REGISTER_PAX_TUPLES(PAX128Tup, 128, "stores attributes using PAX layout with blocks for 128 tuples");
244 REGISTER_PAX_TUPLES(PAX1024Tup, 1024, "stores attributes using PAX layout with blocks for 1024 tuples");
245 C.register_data_layout(C.pool("Row"), std::make_unique<RowLayoutFactory>(), "stores attributes in row-major order");
246#undef REGISTER_PAX
247}
std::unique_ptr< std::size_t[]> compute_attribute_order(const std::vector< const Type * > &types)
Computes the order for attributes of types types and returns this permutation as array of indices.
#define REGISTER_PAX_TUPLES(NAME, BLOCK_SIZE, DESCRIPTION)
#define REGISTER_PAX_BYTES(NAME, BLOCK_SIZE, DESCRIPTION)
__attribute__((constructor(202))) static void register_interpreter()
void add(const char *group_name, const char *short_name, const char *long_name, const char *description, Callback &&callback)
Adds a new group option to the ArgParser.
Definition: ArgParser.hpp:84
#define M_insist(...)
Definition: macro.hpp:129
bool attribute_reordering
Whether to reorder attributes when creating data layouts.
bool remove_null_bitmap
Whether to remove the NULL bitmap in all created data layouts.
bool pax_pack_one_tuple_less
Whether to pack one tuple less than theoretically possible in a created PAX data layout.
‍mutable namespace
Definition: Backend.hpp:10
and
Definition: enum_ops.hpp:12
‍command-line options for the HeuristicSearchPlanEnumerator
Definition: V8Engine.cpp:44
The catalog contains all Databases and keeps track of all meta information of the database system.
Definition: Catalog.hpp:215
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
Definition: Catalog.hpp:274
static Catalog & Get()
Return a reference to the single Catalog instance.
void register_data_layout(ThreadSafePooledString name, std::unique_ptr< storage::DataLayoutFactory > data_layout, const char *description=nullptr)
Registers a new DataLayoutFactory with the given name.
Definition: Catalog.hpp:369
m::ArgParser & arg_parser()
Definition: Catalog.hpp:253
static Pooled< Bitmap > Get_Bitmap(category_t category, std::size_t length)
Returns a Bitmap type of the given category and length.
Definition: Type.cpp:70
This is an interface for factories that compute particular DataLayouts for a given sequence of Types,...
virtual void print(std::ostream &out) const =0
Leaf & add_leaf(const m::Type *type, size_type idx, uint64_t offset_in_bits, uint64_t stride_in_bits)
Creates a Leaf and adds it as a child to this INode.
Definition: DataLayout.cpp:29
Models how data is laid out in a linear address space.
Definition: DataLayout.hpp:29
INode & add_inode(size_type num_tuples, uint64_t stride_in_bits)
Creates an INode and adds it as a child to this DataLayout's internal INode.
Definition: DataLayout.cpp:143
DataLayout make(std::vector< const Type * > types, std::size_t num_tuples=0) const override
Returns a DataLayout for the given types and length num_tuples (0 means infinite layout).
DataLayout make(std::vector< const Type * > types, std::size_t num_tuples=0) const override
Returns a DataLayout for the given types and length num_tuples (0 means infinite layout).