13std::ostream & m::storage::operator<<(std::ostream &out,
const DataLayoutFactory &factory)
38static
void add_storage_args()
46 "--no-attribute-reordering",
47 "do not reorder attributes when creating data layouts, e.g. to minimize padding",
48 [](bool){ options::attribute_reordering =
false; }
53 "--remove-null-bitmap",
54 "remove the NULL bitmap in all created data layouts",
55 [](bool){ options::remove_null_bitmap =
true; }
60 "--pax-pack-one-tuple-less",
61 "pack one tuple less than possible into PAX blocks; only used for benchmarking purposes",
62 [](bool){ options::pax_pack_one_tuple_less =
true; }
72std::unique_ptr<std::size_t[]>
76 auto indices = std::make_unique<std::size_t[]>(types.size());
77 std::iota(indices.get(), indices.get() + types.size(), 0);
79 if (options::attribute_reordering) {
81 std::stable_sort(indices.get(), indices.get() + types.size(), [&](std::size_t left, std::size_t right) {
82 return types[left]->alignment() > types[right]->alignment();
91 M_insist(not types.empty(),
"cannot make layout for zero types");
94 uint64_t offsets[types.size()];
97 uint64_t offset_in_bits = 0;
98 uint64_t alignment_in_bits = 8;
100 for (std::size_t idx = 0; idx != types.size(); ++idx) {
101 const auto mapped_idx = indices[idx];
102 offsets[mapped_idx] = offset_in_bits;
103 offset_in_bits += types[mapped_idx]->size();
104 alignment_in_bits = std::max(alignment_in_bits, types[mapped_idx]->alignment());
107 const uint64_t null_bitmap_offset = offset_in_bits;
110 if (not options::remove_null_bitmap)
111 offset_in_bits += types.size();
112 if (uint64_t rem = offset_in_bits % alignment_in_bits; rem)
113 offset_in_bits += alignment_in_bits - rem;
114 const uint64_t row_size_in_bits = offset_in_bits;
118 auto &row = layout.
add_inode(1, row_size_in_bits);
119 for (std::size_t idx = 0; idx != types.size(); ++idx)
120 row.
add_leaf(types[idx], idx, offsets[idx], 0);
121 if (not options::remove_null_bitmap) {
135 M_insist(not types.empty(),
"cannot make layout for zero types");
138 uint64_t offsets[types.size() + 1];
141 uint64_t offset_in_bits = 0;
142 uint64_t min_size_in_bytes = std::numeric_limits<uint64_t>::max();
143 uint64_t alignment_in_bits = 8;
144 std::size_t num_not_byte_aligned = 0;
146 for (std::size_t idx = 0; idx != types.size(); ++idx) {
147 const auto mapped_idx = indices[idx];
148 offsets[mapped_idx] = offset_in_bits;
149 offset_in_bits += types[mapped_idx]->size();
150 min_size_in_bytes = std::min(min_size_in_bytes, (types[mapped_idx]->size() + 7) / 8);
151 alignment_in_bits = std::max(alignment_in_bits, types[mapped_idx]->alignment());
152 if (types[mapped_idx]->size() % 8)
153 ++num_not_byte_aligned;
157 const uint64_t null_bitmap_size_in_bits =
158 options::remove_null_bitmap ? 0 : std::max(ceil_to_pow_2(types.size()), 8UL);
159 offsets[types.size()] = offset_in_bits;
160 if (null_bitmap_size_in_bits % 8)
161 ++num_not_byte_aligned;
164 const auto num_simd_lanes = std::max<std::size_t>(1, 16 / min_size_in_bytes);
165 std::size_t num_rows_per_block, num_blocks_per_row;
168 if (num_rows_per_block > num_simd_lanes)
171 num_blocks_per_row = 1;
173 const uint64_t row_size_in_bits = offsets[types.size()] + null_bitmap_size_in_bits;
177 num_rows_per_block = std::max<std::size_t>(1, (
num_bytes_ * 8 - num_not_byte_aligned * 7) / row_size_in_bits);
178 if (num_rows_per_block > num_simd_lanes)
180 (num_rows_per_block / num_simd_lanes) * num_simd_lanes;
181 if (options::pax_pack_one_tuple_less
and num_rows_per_block > 1)
182 --num_rows_per_block;
187 uint64_t running_padding = 0;
188 for (std::size_t idx = 0; idx != types.size(); ++idx) {
189 const auto mapped_idx = indices[idx];
190 offsets[mapped_idx] = offsets[mapped_idx] * num_rows_per_block + running_padding;
191 M_insist(offsets[mapped_idx] % 8 == 0,
"attribute column must be byte aligned");
192 if (uint64_t bit_offset = (types[mapped_idx]->size() * num_rows_per_block) % 8; bit_offset)
193 running_padding += 8UL - bit_offset;
195 offsets[types.size()] = offsets[types.size()] * num_rows_per_block + running_padding;
198 uint64_t block_size_in_bits;
200 block_size_in_bits = offsets[types.size()] + null_bitmap_size_in_bits * num_rows_per_block;
201 if (uint64_t alignment_offset = block_size_in_bits % alignment_in_bits)
202 block_size_in_bits += alignment_in_bits - alignment_offset;
207 M_insist(offsets[types.size()] % 8 == 0,
"NULL bitmap column must be byte aligned");
208 M_insist(offsets[types.size()] + null_bitmap_size_in_bits * num_rows_per_block <=
209 block_size_in_bits * num_blocks_per_row,
210 "computed block layout must not exceed block size");
214 auto &pax_block = layout.
add_inode(num_rows_per_block, num_blocks_per_row * block_size_in_bits);
215 for (std::size_t idx = 0; idx != types.size(); ++idx)
216 pax_block.
add_leaf(types[idx], idx, offsets[idx], types[idx]->size());
217 if (not options::remove_null_bitmap) {
221 offsets[types.size()],
222 null_bitmap_size_in_bits
230static
void register_data_layouts()
233#define REGISTER_PAX_BYTES(NAME, BLOCK_SIZE, DESCRIPTION) \
234 C.register_data_layout(C.pool(#NAME), std::make_unique<PAXLayoutFactory>(PAXLayoutFactory::NBytes, BLOCK_SIZE), DESCRIPTION)
235#define REGISTER_PAX_TUPLES(NAME, BLOCK_SIZE, DESCRIPTION) \
236 C.register_data_layout(C.pool(#NAME), std::make_unique<PAXLayoutFactory>(PAXLayoutFactory::NTuples, BLOCK_SIZE), DESCRIPTION)
237 REGISTER_PAX_BYTES(PAX4M, 1UL << 22,
"stores attributes using PAX layout with 4MiB blocks");
238 REGISTER_PAX_BYTES(PAX4K, 1UL << 12,
"stores attributes using PAX layout with 4KiB blocks");
239 REGISTER_PAX_BYTES(PAX64K, 1UL << 16,
"stores attributes using PAX layout with 64KiB blocks");
240 REGISTER_PAX_BYTES(PAX512K, 1UL << 19,
"stores attributes using PAX layout with 512KiB blocks");
241 REGISTER_PAX_BYTES(PAX64M, 1UL << 26,
"stores attributes using PAX layout with 64MiB blocks");
242 REGISTER_PAX_TUPLES(PAX16Tup, 16,
"stores attributes using PAX layout with blocks for 16 tuples");
243 REGISTER_PAX_TUPLES(PAX128Tup, 128,
"stores attributes using PAX layout with blocks for 128 tuples");
244 REGISTER_PAX_TUPLES(PAX1024Tup, 1024,
"stores attributes using PAX layout with blocks for 1024 tuples");
245 C.
register_data_layout(C.
pool(
"Row"), std::make_unique<RowLayoutFactory>(),
"stores attributes in row-major order");
std::unique_ptr< std::size_t[]> compute_attribute_order(const std::vector< const Type * > &types)
Computes the order for attributes of types types and returns this permutation as array of indices.
#define REGISTER_PAX_TUPLES(NAME, BLOCK_SIZE, DESCRIPTION)
#define REGISTER_PAX_BYTES(NAME, BLOCK_SIZE, DESCRIPTION)
__attribute__((constructor(202))) static void register_interpreter()
void add(const char *group_name, const char *short_name, const char *long_name, const char *description, Callback &&callback)
Adds a new group option to the ArgParser.
bool attribute_reordering
Whether to reorder attributes when creating data layouts.
bool remove_null_bitmap
Whether to remove the NULL bitmap in all created data layouts.
bool pax_pack_one_tuple_less
Whether to pack one tuple less than theoretically possible in a created PAX data layout.
command-line options for the HeuristicSearchPlanEnumerator
The catalog contains all Databases and keeps track of all meta information of the database system.
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
static Catalog & Get()
Return a reference to the single Catalog instance.
void register_data_layout(ThreadSafePooledString name, std::unique_ptr< storage::DataLayoutFactory > data_layout, const char *description=nullptr)
Registers a new DataLayoutFactory with the given name.
m::ArgParser & arg_parser()
static Pooled< Bitmap > Get_Bitmap(category_t category, std::size_t length)
Returns a Bitmap type of the given category and length.
This is an interface for factories that compute particular DataLayouts for a given sequence of Types,...
virtual void print(std::ostream &out) const =0
Leaf & add_leaf(const m::Type *type, size_type idx, uint64_t offset_in_bits, uint64_t stride_in_bits)
Creates a Leaf and adds it as a child to this INode.
Models how data is laid out in a linear address space.
INode & add_inode(size_type num_tuples, uint64_t stride_in_bits)
Creates an INode and adds it as a child to this DataLayout's internal INode.
DataLayout make(std::vector< const Type * > types, std::size_t num_tuples=0) const override
Returns a DataLayout for the given types and length num_tuples (0 means infinite layout).
DataLayout make(std::vector< const Type * > types, std::size_t num_tuples=0) const override
Returns a DataLayout for the given types and length num_tuples (0 means infinite layout).