mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
store_manip.cpp
Go to the documentation of this file.
1#include "store_manip.hpp"
2
4#include "util/datagen.hpp"
5#include <algorithm>
6#include <chrono>
7#include <cstring>
8#include <limits>
10#include <mutable/Options.hpp>
11#include <mutable/util/fn.hpp>
12#include <random>
13#include <unordered_set>
14
15
16using namespace m;
17
18
19//======================================================================================================================
20// HELPER FUNCTIONS
21//======================================================================================================================
22
23namespace {
24
26template<typename T>
27std::vector<T> generate_distinct_numbers(const T min, const T max, const std::size_t count)
28{
29 using uniform_distibution_t = std::conditional_t<std::is_integral_v<T>,
30 std::uniform_int_distribution<T>,
31 std::uniform_real_distribution<T>>;
32 static_assert(std::is_arithmetic_v<T>, "T must be an arithmetic type");
33 if (std::is_integral_v<T>)
34 M_insist(T(max - count) > min, "range is too small to provide enough distinct values");
35
36 std::vector<T> values;
37 values.reserve(count);
38
39 std::unordered_set<T> taken;
40 T counter = max - T(count);
41
42 std::mt19937_64 g(0);
43 uniform_distibution_t dist(min, counter);
44
45 for (std::size_t i = 0; i != count; ++i) {
46 T val = dist(g);
47 auto[_, success] = taken.insert(val);
48 if (success) {
49 values.push_back(val);
50 } else {
51 values.push_back(++counter);
52 }
53 }
54
55 return values;
56}
57
60template<typename T>
61void generate_numeric_column(T *column_ptr, std::size_t num_distinct_values, std::size_t begin, std::size_t end)
62{
63 static_assert(std::is_arithmetic_v<T>, "T must be an arithmetic type");
64 M_insist(begin < end, "must set at least one row");
65
66 const auto count = end - begin;
67
68 /* Generate distinct values. */
69 std::vector<T> values;
70 if (std::is_integral_v<T>)
71 values = datagen::generate_uniform_distinct_numbers<T>(
72 /* min= */ std::numeric_limits<T>::lowest() + std::is_signed_v<T>,
73 /* max= */ std::numeric_limits<T>::max(),
74 /* count= */ num_distinct_values
75 );
76 else
77 values = datagen::generate_uniform_distinct_numbers<T>(T(0), T(1), num_distinct_values);
78 M_insist(values.size() == num_distinct_values);
79
80 /* Write distinct values repeatedly in arbitrary order to column. */
81 auto ptr = column_ptr + begin;
82 std::mt19937_64 g(0);
83 for (std::size_t i = 0; i != count; ) {
84 /* Shuffle the vector before writing its values to the column. */
85 std::shuffle(values.begin(), values.end(), g);
86 for (auto v : values) {
87 *ptr++ = v;
88 ++i;
89 if (i == count)
90 goto exit;
91 }
92 }
93exit:
94 M_insist(ptr - column_ptr == long(count), "incorrect number of elements written");
95}
96
97template<typename T>
98void generate_correlated_numeric_columns(T *left_ptr, T *right_ptr, std::size_t num_distinct_values_left,
99 std::size_t num_distinct_values_right, std::size_t count_left,
100 std::size_t count_right, std::size_t num_distinct_values_matching)
101{
102 static_assert(std::is_arithmetic_v<T>, "T must be an arithmetic type");
103 M_insist(num_distinct_values_left >= num_distinct_values_matching,
104 "num_distinct_values_left must be larger than num_distinct_values_matching");
105 M_insist(num_distinct_values_right >= num_distinct_values_matching,
106 "num_distinct_values_right must be larger than num_distinct_values_matching");
107
108 /* Generate a single set of distinct values to draw from to fill left and right side, with an overlap of exactly
109 * `num_distinct_values_matching` values. */
110 const std::vector<T> values = generate_distinct_numbers<T>(
111 /* min= */ std::numeric_limits<T>::lowest(),
112 /* max= */ std::numeric_limits<T>::max(),
113 /* count= */ num_distinct_values_left + num_distinct_values_right - num_distinct_values_matching
114 );
115
116 std::mt19937_64 g(0);
117
118 /* Fill `store_left`. */
119 {
120 /* Draw first `num_distinct_values_left` values from `values`. */
121 std::vector<T> values_left(values.begin(), values.begin() + num_distinct_values_left);
122
123 /* Write distinct values repeatedly in arbitrary order to column. */
124 auto left = left_ptr;
125 for (std::size_t i = 0; i != count_left; ) {
126 /* Shuffle the vector before writing its values to the column. */
127 std::shuffle(values_left.begin(), values_left.end(), g);
128 for (auto v : values_left) {
129 *left++ = v;
130 ++i;
131 if (i == count_left)
132 goto exit_left;
133 }
134 }
135exit_left:
136 M_insist(left - left_ptr == long(count_left),
137 "incorrect number of elements written to left store");
138 }
139
140 /* Fill `store_right`. */
141 {
142 /* Draw last `num_distinct_values_right` values from `values`. */
143 std::vector<T> values_right(values.rbegin(), values.rbegin() + num_distinct_values_right);
144
145 /* Write distinct values repeatedly in arbitrary order to column. */
146 auto right = right_ptr;
147 for (std::size_t i = 0; i != count_right; ) {
148 /* Shuffle the vector before writing its values to the column. */
149 std::shuffle(values_right.begin(), values_right.end(), g);
150 for (auto v : values_right) {
151 *right++ = v;
152 ++i;
153 if (i == count_right)
154 goto exit_right;
155 }
156 }
157exit_right:
158 M_insist(right - right_ptr == long(count_right),
159 "incorrect number of elements written to right store");
160 }
161}
162
163}
164
165void m::set_all_null(uint8_t *column_ptr, std::size_t num_attrs, std::size_t begin, std::size_t end)
166{
167 M_insist(begin < end, "must set at least one row");
168
169 auto begin_bytes = (num_attrs * begin) / 8U;
170 const auto begin_bits = (num_attrs * begin) % 8U;
171 const auto end_bytes = (num_attrs * end) / 8U;
172 const auto end_bits = (num_attrs * end) % 8U;
173
174 M_insist(begin_bytes != end_bytes, "the `begin`-th row and `end`-th row must be in different bytes");
175
176 /* Set the `begin_bits` most significant bits to 1. */
177 if (begin_bits) {
178 *(column_ptr + begin_bytes) |= ~((1U << (8U - begin_bits)) - 1U); // set respective bits to 1
179 ++begin_bytes; // advance to first byte which can be written entirely
180 }
181
182 /* Set the `end_bits` least significant bits to 1. */
183 if (end_bits)
184 *(column_ptr + end_bytes) |= (1U << end_bits) - 1U; // set respective bits to 1
185
186 const auto num_bytes = end_bytes - begin_bytes;
187 std::memset(column_ptr + begin_bytes, uint8_t(~0), num_bytes);
188}
189
190void m::set_all_not_null(uint8_t *column_ptr, std::size_t num_attrs, std::size_t begin, std::size_t end)
191{
192 M_insist(begin < end, "must set at least one row");
193
194 auto begin_bytes = (num_attrs * begin) / 8U;
195 const auto begin_bits = (num_attrs * begin) % 8U;
196 const auto end_bytes = (num_attrs * end) / 8U;
197 const auto end_bits = (num_attrs * end) % 8U;
198
199 M_insist(begin_bytes != end_bytes, "the `begin`-th row and `end`-th row must be in different bytes");
200
201 /* Set the `begin_bits` most significant bits to 0. */
202 if (begin_bits) {
203 *(column_ptr + begin_bytes) &= (1U << begin_bits) - 1U; // set respective bits to 0
204 ++begin_bytes; // advance to first byte which can be written entirely
205 }
206
207 /* Set the `end_bits` least significant bits to 0. */
208 if (end_bits)
209 *(column_ptr + end_bytes) &= ~((1U << (8U - end_bits)) - 1U); // set respective bits to 0
210
211 const auto num_bytes = end_bytes - begin_bytes;
212 std::memset(column_ptr + begin_bytes, uint8_t(0), num_bytes);
213}
214
215
216void m::generate_primary_keys(void *column_ptr, const Type &type, std::size_t begin, std::size_t end)
217{
218 M_insist(begin < end, "must set at least one row");
219 M_insist(type.is_integral(), "primary key columns must be of integral type");
220
221 auto &n = as<const Numeric>(type);
222 switch (n.size()) {
223 default:
224 M_unreachable("unsupported size of primary key");
225
226 case 32: {
227 auto ptr = reinterpret_cast<int32_t*>(column_ptr);
228 std::iota<int32_t*, int32_t>(ptr + begin, ptr + end, begin);
229 break;
230 }
231
232 case 64: {
233 auto ptr = reinterpret_cast<int64_t*>(column_ptr);
234 std::iota<int64_t*, int64_t>(ptr + begin, ptr + end, begin);
235 break;
236 }
237 }
238}
239
240void m::generate_column_data(void *column_ptr, const Attribute &attr, std::size_t num_distinct_values,
241 std::size_t begin, std::size_t end)
242{
243 M_insist(begin < end, "must set at least one row");
244
245 if (streq(*attr.name, "id")) { // generate primary key
246 generate_primary_keys(column_ptr, *attr.type, begin, end);
247 } else if (auto n = cast<const Numeric>(attr.type)) {
248 switch (n->kind) {
249 case m::Numeric::N_Int:
250 switch (n->size()) {
251#define CASE(N) case N: \
252 ::generate_numeric_column(reinterpret_cast<int##N##_t*>(column_ptr), num_distinct_values, begin, end); \
253 break
254 CASE(8);
255 CASE(16);
256 CASE(32);
257 CASE(64);
258#undef CASE
259 }
260 break;
261 case m::Numeric::N_Float:
262 switch (n->size()) {
263 case 32:
264 ::generate_numeric_column(reinterpret_cast<float*>(column_ptr), num_distinct_values, begin, end);
265 break;
266 case 64:
267 ::generate_numeric_column(reinterpret_cast<double*>(column_ptr), num_distinct_values, begin, end);
268 break;
269 }
270 break;
271 case m::Numeric::N_Decimal:
272 M_unreachable("unsupported type");
273 }
274
275 } else {
276 M_unreachable("unsupported type");
277 }
278}
279
280void m::generate_correlated_column_data(void *left_ptr, void *right_ptr, const Attribute &attr,
281 std::size_t num_distinct_values_left, std::size_t num_distinct_values_right,
282 std::size_t count_left, std::size_t count_right,
283 std::size_t num_distinct_values_matching)
284{
285 if (streq(*attr.name, "id")) { // primary keys cannot be correlated
286 M_unreachable("primary keys unsupported");
287 } else if (auto n = cast<const Numeric>(attr.type)) {
288 switch (n->size()) {
289#define CASE(N) case N: \
290 generate_correlated_numeric_columns(reinterpret_cast<int##N##_t*>(left_ptr), \
291 reinterpret_cast<int##N##_t*>(right_ptr), \
292 num_distinct_values_left, num_distinct_values_right, \
293 count_left, count_right, \
294 num_distinct_values_matching); \
295 break
296 CASE(8);
297 CASE(16);
298 CASE(32);
299 CASE(64);
300#undef CASE
301 }
302 } else {
303 M_unreachable("unsupported type");
304 }
305}
#define M_unreachable(MSG)
Definition: macro.hpp:146
#define M_insist(...)
Definition: macro.hpp:129
auto max(PrimitiveExpr< U, L > other) -> PrimitiveExpr< common_type_t< T, U >, L > std
Computes the maximum of this and other.
Definition: WasmDSL.hpp:2504
‍mutable namespace
Definition: Backend.hpp:10
void set_all_not_null(uint8_t *column_ptr, std::size_t num_attrs, std::size_t begin, std::size_t end)
Sets all attributes of the begin-th row (including) to the end-th row (excluding) of column at addres...
void generate_column_data(void *column_ptr, const Attribute &attr, std::size_t num_distinct_values, std::size_t begin, std::size_t end)
Generates data for the column at address column_ptr from begin-th row (including) to end-th row (excl...
bool streq(const char *first, const char *second)
Definition: fn.hpp:29
T(x)
void set_all_null(uint8_t *column_ptr, std::size_t num_attrs, std::size_t begin, std::size_t end)
Sets all attributes of the begin-th row (including) to the end-th row (excluding) of column at addres...
void generate_correlated_column_data(void *left_ptr, void *right_ptr, const Attribute &attr, std::size_t num_distinct_values_left, std::size_t num_distinct_values_right, std::size_t count_left, std::size_t count_right, std::size_t num_distinct_values_matching)
Generates data for two columns at addresses left_ptr and right_ptr correlated by num_distinct_values_...
void generate_primary_keys(void *column_ptr, const Type &type, std::size_t begin, std::size_t end)
Generates primary keys of Type type for the begin-th row (including) to the end-th row (excluding) of...
#define CASE(N)
An attribute of a table.
Definition: Schema.hpp:289
const PrimitiveType * type
the type of the attribute
Definition: Schema.hpp:294
ThreadSafePooledString name
the name of the attribute
Definition: Schema.hpp:295
This class represents types in the SQL type system.
Definition: Type.hpp:46
bool is_integral() const
Definition: Type.hpp:500