13#include <unordered_set>
27std::vector<T> generate_distinct_numbers(
const T min,
const T max,
const std::size_t count)
29 using uniform_distibution_t = std::conditional_t<std::is_integral_v<T>,
30 std::uniform_int_distribution<T>,
31 std::uniform_real_distribution<T>>;
32 static_assert(std::is_arithmetic_v<T>,
"T must be an arithmetic type");
33 if (std::is_integral_v<T>)
34 M_insist(
T(max - count) > min,
"range is too small to provide enough distinct values");
36 std::vector<T> values;
37 values.reserve(count);
39 std::unordered_set<T> taken;
40 T counter =
max -
T(count);
43 uniform_distibution_t dist(min, counter);
45 for (std::size_t i = 0; i != count; ++i) {
47 auto[_, success] = taken.insert(val);
49 values.push_back(val);
51 values.push_back(++counter);
61void generate_numeric_column(T *column_ptr, std::size_t num_distinct_values, std::size_t begin, std::size_t end)
63 static_assert(std::is_arithmetic_v<T>,
"T must be an arithmetic type");
64 M_insist(begin < end,
"must set at least one row");
66 const auto count = end - begin;
69 std::vector<T> values;
70 if (std::is_integral_v<T>)
71 values = datagen::generate_uniform_distinct_numbers<T>(
72 std::numeric_limits<T>::lowest() + std::is_signed_v<T>,
73 std::numeric_limits<T>::max(),
77 values = datagen::generate_uniform_distinct_numbers<T>(
T(0),
T(1), num_distinct_values);
78 M_insist(values.size() == num_distinct_values);
81 auto ptr = column_ptr + begin;
83 for (std::size_t i = 0; i != count; ) {
85 std::shuffle(values.begin(), values.end(), g);
86 for (
auto v : values) {
94 M_insist(ptr - column_ptr ==
long(count),
"incorrect number of elements written");
98void generate_correlated_numeric_columns(T *left_ptr, T *right_ptr, std::size_t num_distinct_values_left,
99 std::size_t num_distinct_values_right, std::size_t count_left,
100 std::size_t count_right, std::size_t num_distinct_values_matching)
102 static_assert(std::is_arithmetic_v<T>,
"T must be an arithmetic type");
103 M_insist(num_distinct_values_left >= num_distinct_values_matching,
104 "num_distinct_values_left must be larger than num_distinct_values_matching");
105 M_insist(num_distinct_values_right >= num_distinct_values_matching,
106 "num_distinct_values_right must be larger than num_distinct_values_matching");
110 const std::vector<T> values = generate_distinct_numbers<T>(
111 std::numeric_limits<T>::lowest(),
112 std::numeric_limits<T>::max(),
113 num_distinct_values_left + num_distinct_values_right - num_distinct_values_matching
116 std::mt19937_64 g(0);
121 std::vector<T> values_left(values.begin(), values.begin() + num_distinct_values_left);
124 auto left = left_ptr;
125 for (std::size_t i = 0; i != count_left; ) {
127 std::shuffle(values_left.begin(), values_left.end(), g);
128 for (
auto v : values_left) {
136 M_insist(left - left_ptr ==
long(count_left),
137 "incorrect number of elements written to left store");
143 std::vector<T> values_right(values.rbegin(), values.rbegin() + num_distinct_values_right);
146 auto right = right_ptr;
147 for (std::size_t i = 0; i != count_right; ) {
149 std::shuffle(values_right.begin(), values_right.end(), g);
150 for (
auto v : values_right) {
153 if (i == count_right)
158 M_insist(right - right_ptr ==
long(count_right),
159 "incorrect number of elements written to right store");
165void m::set_all_null(uint8_t *column_ptr, std::size_t num_attrs, std::size_t begin, std::size_t end)
167 M_insist(begin < end,
"must set at least one row");
169 auto begin_bytes = (num_attrs * begin) / 8U;
170 const auto begin_bits = (num_attrs * begin) % 8U;
171 const auto end_bytes = (num_attrs * end) / 8U;
172 const auto end_bits = (num_attrs * end) % 8U;
174 M_insist(begin_bytes != end_bytes,
"the `begin`-th row and `end`-th row must be in different bytes");
178 *(column_ptr + begin_bytes) |= ~((1U << (8U - begin_bits)) - 1U);
184 *(column_ptr + end_bytes) |= (1U << end_bits) - 1U;
186 const auto num_bytes = end_bytes - begin_bytes;
187 std::memset(column_ptr + begin_bytes, uint8_t(~0), num_bytes);
192 M_insist(begin < end,
"must set at least one row");
194 auto begin_bytes = (num_attrs * begin) / 8U;
195 const auto begin_bits = (num_attrs * begin) % 8U;
196 const auto end_bytes = (num_attrs * end) / 8U;
197 const auto end_bits = (num_attrs * end) % 8U;
199 M_insist(begin_bytes != end_bytes,
"the `begin`-th row and `end`-th row must be in different bytes");
203 *(column_ptr + begin_bytes) &= (1U << begin_bits) - 1U;
209 *(column_ptr + end_bytes) &= ~((1U << (8U - end_bits)) - 1U);
211 const auto num_bytes = end_bytes - begin_bytes;
212 std::memset(column_ptr + begin_bytes, uint8_t(0), num_bytes);
218 M_insist(begin < end,
"must set at least one row");
221 auto &n = as<const Numeric>(type);
227 auto ptr =
reinterpret_cast<int32_t*
>(column_ptr);
228 std::iota<int32_t*, int32_t>(ptr + begin, ptr + end, begin);
233 auto ptr =
reinterpret_cast<int64_t*
>(column_ptr);
234 std::iota<int64_t*, int64_t>(ptr + begin, ptr + end, begin);
241 std::size_t begin, std::size_t end)
243 M_insist(begin < end,
"must set at least one row");
247 }
else if (
auto n = cast<const Numeric>(attr.
type)) {
249 case m::Numeric::N_Int:
251#define CASE(N) case N: \
252 ::generate_numeric_column(reinterpret_cast<int##N##_t*>(column_ptr), num_distinct_values, begin, end); \
261 case m::Numeric::N_Float:
264 ::generate_numeric_column(
reinterpret_cast<float*
>(column_ptr), num_distinct_values, begin, end);
267 ::generate_numeric_column(
reinterpret_cast<double*
>(column_ptr), num_distinct_values, begin, end);
271 case m::Numeric::N_Decimal:
281 std::size_t num_distinct_values_left, std::size_t num_distinct_values_right,
282 std::size_t count_left, std::size_t count_right,
283 std::size_t num_distinct_values_matching)
287 }
else if (
auto n = cast<const Numeric>(attr.
type)) {
289#define CASE(N) case N: \
290 generate_correlated_numeric_columns(reinterpret_cast<int##N##_t*>(left_ptr), \
291 reinterpret_cast<int##N##_t*>(right_ptr), \
292 num_distinct_values_left, num_distinct_values_right, \
293 count_left, count_right, \
294 num_distinct_values_matching); \
#define M_unreachable(MSG)
auto max(PrimitiveExpr< U, L > other) -> PrimitiveExpr< common_type_t< T, U >, L > std
Computes the maximum of this and other.
void set_all_not_null(uint8_t *column_ptr, std::size_t num_attrs, std::size_t begin, std::size_t end)
Sets all attributes of the begin-th row (including) to the end-th row (excluding) of column at addres...
void generate_column_data(void *column_ptr, const Attribute &attr, std::size_t num_distinct_values, std::size_t begin, std::size_t end)
Generates data for the column at address column_ptr from begin-th row (including) to end-th row (excl...
bool streq(const char *first, const char *second)
void set_all_null(uint8_t *column_ptr, std::size_t num_attrs, std::size_t begin, std::size_t end)
Sets all attributes of the begin-th row (including) to the end-th row (excluding) of column at addres...
void generate_correlated_column_data(void *left_ptr, void *right_ptr, const Attribute &attr, std::size_t num_distinct_values_left, std::size_t num_distinct_values_right, std::size_t count_left, std::size_t count_right, std::size_t num_distinct_values_matching)
Generates data for two columns at addresses left_ptr and right_ptr correlated by num_distinct_values_...
void generate_primary_keys(void *column_ptr, const Type &type, std::size_t begin, std::size_t end)
Generates primary keys of Type type for the begin-th row (including) to the end-th row (excluding) of...
const PrimitiveType * type
the type of the attribute
ThreadSafePooledString name
the name of the attribute
This class represents types in the SQL type system.