19double rmi_model_entry_ratio = 0.01;
24static
void add_index_args()
32 "--rmi-model-entry-ratio",
33 "specify the ratio of linear models to index entries for recursive model indexes",
34 [](
double rmi_model_entry_ratio){ options::rmi_model_entry_ratio = rmi_model_entry_ratio; }
42 std::ostringstream oss;
44 for (std::size_t i = 0; i != schema.
num_entries(); ++i) {
45 if (i != 0) oss <<
", ";
46 oss << schema.
at(i).
id;
48 oss <<
" FROM " << table.
name() <<
';';
61 auto entry = key_schema.
at(0);
64 auto attribute_type = entry.
type;
66 if constexpr(not std::same_as<key_type, TYPE>) \
67 throw invalid_argument("Key type and attribute type do not match."); \
75 case Numeric::N_Decimal:
78 case 8:
CHECK(int8_t);
79 case 16:
CHECK(int16_t);
80 case 32:
CHECK(int32_t);
81 case 64:
CHECK(int64_t);
83 case Numeric::N_Float:
86 case 32:
CHECK(
float);
87 case 64:
CHECK(
double);
99 auto query = build_query(table, key_schema);
110 fn_get = [](
const Tuple &t) {
return static_cast<key_type>(t.
get(0).
as<int64_t>()); };
115 std::size_t tuple_id = 0;
116 auto fn_add = [&](
const Schema&,
const Tuple &tuple) {
117 if (not tuple.is_null(0))
118 this->add(fn_get(tuple), tuple_id);
121 auto consumer = std::make_unique<CallbackOperator>(fn_add);
127 static thread_local std::unique_ptr<Backend> backend;
138 std::exchange(
Catalog::Get().timer(), std::move(old_timer));
141template<
typename Key>
144 if constexpr(std::same_as<key_type, const char*>) {
146 data_.emplace_back(C.
pool(key), value);
148 data_.emplace_back(key, value);
153template<arithmetic Key>
157 std::sort(base_type::data_.begin(), base_type::data_.end(), base_type::cmp);
160 auto begin = base_type::begin();
161 auto end = base_type::end();
162 std::size_t n_keys = std::distance(begin, end);
163 std::size_t n_models = std::max<std::size_t>(1, n_keys * options::rmi_model_entry_ratio);
164 models_.reserve(n_models + 1);
167 models_.emplace_back(
168 LinearModel::train_linear_spline(
172 static_cast<double>(n_models) / n_keys
177 auto get_segment_id = [&](
entry_type e) {
return std::clamp<double>(models_[0](e.first), 0, n_models - 1); };
178 std::size_t segment_start = 0;
179 std::size_t segment_id = 0;
180 for (std::size_t i = 0; i != n_keys; ++i) {
181 auto pos = begin + i;
182 std::size_t pred_segment_id = get_segment_id(*pos);
183 if (pred_segment_id > segment_id) {
184 models_.emplace_back(
185 LinearModel::train_linear_regression(
186 begin + segment_start,
191 for (std::size_t j = segment_id + 1; j < pred_segment_id; ++j) {
192 models_.emplace_back(
193 LinearModel::train_linear_regression(
200 segment_id = pred_segment_id;
205 models_.emplace_back(
206 LinearModel::train_linear_regression(
207 begin + segment_start,
212 for (std::size_t j = segment_id + 1; j < n_models; ++j) {
213 models_.emplace_back(
214 LinearModel::train_linear_regression(
223 base_type::finalized_ =
true;
227#define INSTANTIATE(CLASS) \
228 template struct CLASS;
#define M_INDEX_LIST_TEMPLATED(X)
__attribute__((constructor(202))) static void register_interpreter()
#define INSTANTIATE(CLASS)
void add(const char *group_name, const char *short_name, const char *long_name, const char *description, Callback &&callback)
Adds a new group option to the ArgParser.
#define M_unreachable(MSG)
std::unique_ptr< ast::Stmt > M_EXPORT statement_from_string(Diagnostic &diag, const std::string &str)
Use lexer, parser, and semantic analysis to create a Stmt from str.
void M_EXPORT execute_query(Diagnostic &diag, const ast::SelectStmt &stmt, std::unique_ptr< Consumer > consumer)
Optimizes and executes the given SelectStmt.
auto visit(Callable &&callable, Base &obj, m::tag< Callable > &&=m::tag< Callable >())
Generic implementation to visit a class hierarchy, with similar syntax as std::visit.
command-line options for the HeuristicSearchPlanEnumerator
The catalog contains all Databases and keeps track of all meta information of the database system.
ThreadSafePooledString pool(const char *str) const
Creates an internalized copy of the string str by adding it to the internal StringPool.
static Catalog & Get()
Return a reference to the single Catalog instance.
std::unique_ptr< Backend > create_backend() const
Returns a new Backend.
m::ArgParser & arg_parser()
The type of character strings, both fixed length and varying length.
The numeric type represents integer and floating-point types of different precision and scale.
static Options & Get()
Return a reference to the single Options instance.
A Schema represents a sequence of identifiers, optionally with a prefix, and their associated types.
std::size_t num_entries() const
Returns the number of entries in this Schema.
entry_type & at(std::size_t idx)
Returns the entry at index idx with in-bounds checking.
A table is a sorted set of attributes.
virtual const ThreadSafePooledString & name() const =0
Returns the name of the Table.
Collect timings of events.
Value & get(std::size_t idx)
Returns a reference to the Value at index idx.
std::conditional_t< std::is_pointer_v< T >, T, T & > as()
Returns a reference to the value interpreted as of type T.
void bulkload(const Table &table, const Schema &key_schema) override
Bulkloads the index from table on the key contained in key_schema by executing a query and adding one...
void add(const key_type key, const value_type value)
Adds a single pair of key and value to the index.
static std::string build_query(const Table &table, const Schema &schema)
Constructs a query string to select all attributes in schema from table.
void finalize() override
Sorts the underlying vector, builds the linear models, and flags the index as finalized.
base_type::entry_type entry_type
Signals that an argument to a function of method was invalid.