mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
SpnWrapper.cpp
Go to the documentation of this file.
1#include "SpnWrapper.hpp"
2
3#include <mutable/mutable.hpp>
5
6
7using namespace m;
8using namespace Eigen;
9
10
12 const ThreadSafePooledString &name_of_table,
13 std::vector<Spn::LeafType> leaf_types)
14{
15 auto &C = Catalog::Get();
16 auto &db = C.get_database(name_of_database);
17 auto &table = db.get_table(name_of_table);
18
19 leaf_types.resize(table.num_attrs(), Spn::AUTO); // pad with AUTO
20
21 /* use CartesianProductEstimator to query data since there currently are no SPNs on the data. */
22 auto old_estimator = db.cardinality_estimator(C.create_cardinality_estimator(C.pool("CartesianProduct"), db.name));
23
24 std::size_t num_columns = table.num_attrs();
25 std::size_t num_rows = table.store().num_rows();
26
27 Diagnostic diag(false, std::cout, std::cerr);
28
29 auto primary_key = table.primary_key();
30 std::vector<std::size_t> primary_key_id;
31 for (auto &elem : primary_key) {
32 primary_key_id.push_back(elem.get().id);
33 }
34
35 MatrixXf data(num_rows, num_columns - primary_key_id.size());
36 MatrixXi null_matrix = MatrixXi::Zero(data.rows(), data.cols());
37 std::unordered_map<ThreadSafePooledString, unsigned> attribute_to_id;
38
39 std::size_t primary_key_count = 0;
40
41 const std::string table_name = *table.name();
42 auto stmt = statement_from_string(diag, "SELECT * FROM " + table_name + ";");
43 std::unique_ptr<ast::SelectStmt> select_stmt(dynamic_cast<ast::SelectStmt*>(stmt.release()));
44
45 /* fill the data matrix with the given table */
46 for (std::size_t current_column = 0; current_column < num_columns; current_column++) {
47 auto lower_bound = std::lower_bound(primary_key_id.begin(), primary_key_id.end(), current_column);
48 if (lower_bound != primary_key_id.end() && *lower_bound == current_column) {
49 primary_key_count++;
50 continue;
51 }
52
53 auto attribute = table.schema()[current_column].id.name;
54 attribute_to_id.emplace(attribute, current_column - primary_key_count);
55
56 auto &type = table.at(current_column).type;
57 std::size_t current_row = 0;
58
59 if (type->is_float()) {
60 if (leaf_types[current_column - primary_key_count] == Spn::AUTO) {
61 leaf_types[current_column - primary_key_count] = Spn::CONTINUOUS;
62 }
63 auto callback_data = std::make_unique<CallbackOperator>([&](const Schema &S, const Tuple &T) {
64 if (T.is_null(current_column)) {
65 null_matrix(current_row, current_column - primary_key_count) = 1;
66 data(current_row, current_column - primary_key_count) = 0;
67 } else {
68 data(current_row, current_column - primary_key_count) = T.get(current_column).as_f();
69 }
70 current_row++;
71 });
72 execute_query(diag, *select_stmt, std::move(callback_data));
73 }
74
75 if (type->is_double()) {
76 if (leaf_types[current_column - primary_key_count] == Spn::AUTO) {
77 leaf_types[current_column - primary_key_count] = Spn::CONTINUOUS;
78 }
79 auto callback_data = std::make_unique<CallbackOperator>([&](const Schema &S, const Tuple &T) {
80 if (T.is_null(current_column)) {
81 null_matrix(current_row, current_column - primary_key_count) = 1;
82 data(current_row, current_column - primary_key_count) = 0;
83 } else {
84 data(current_row, current_column - primary_key_count) = float(T.get(current_column).as_d());
85 }
86 current_row++;
87 });
88 execute_query(diag, *select_stmt, std::move(callback_data));
89 }
90
91 if (type->is_integral()) {
92 if (leaf_types[current_column - primary_key_count] == Spn::AUTO) {
93 leaf_types[current_column - primary_key_count] = Spn::DISCRETE;
94 }
95 auto callback_data = std::make_unique<CallbackOperator>([&](const Schema &S, const Tuple &T) {
96 if (T.is_null(current_column)) {
97 null_matrix(current_row, current_column - primary_key_count) = 1;
98 data(current_row, current_column - primary_key_count) = 0;
99 } else {
100 data(current_row, current_column - primary_key_count) = float(T.get(current_column).as_i());
101 }
102 current_row++;
103 });
104 execute_query(diag, *select_stmt, std::move(callback_data));
105 }
106
107 if (type->is_character_sequence()) {
108 if (leaf_types[current_column - primary_key_count] == Spn::AUTO) {
109 leaf_types[current_column - primary_key_count] = Spn::CONTINUOUS;
110 }
111 auto callback_data = std::make_unique<CallbackOperator>([&](const Schema &S, const Tuple &T) {
112 if (T.is_null(current_column)) {
113 null_matrix(current_row, current_column - primary_key_count) = 1;
114 data(current_row, current_column - primary_key_count) = 0;
115 } else {
116 auto v_pointer = T.get(current_column).as_p();
117 const char* value = static_cast<const char*>(v_pointer);
118 data(current_row, current_column - primary_key_count) = float(std::hash<const char*>{}(value));
119 //data(current_row, current_column-primary_key_count) = 0;
120 }
121 current_row++;
122 });
123 execute_query(diag, *select_stmt, std::move(callback_data));
124 }
125 }
126
127 db.cardinality_estimator(std::move(old_estimator));
128
129 return SpnWrapper(Spn::learn_spn(data, null_matrix, leaf_types), std::move(attribute_to_id));
130}
131
132std::unordered_map<ThreadSafePooledString, SpnWrapper*>
134 std::unordered_map<ThreadSafePooledString, std::vector<Spn::LeafType>> leaf_types)
135{
136 auto &C = Catalog::Get();
137 auto &db = C.get_database(name_of_database);
138
139 std::unordered_map<ThreadSafePooledString, SpnWrapper*> spns;
140
141 for (auto table_it = db.begin_tables(); table_it != db.end_tables(); table_it++) {
142 spns.emplace(
143 table_it->first,
144 new SpnWrapper(
145 learn_spn_table(name_of_database, table_it->first, std::move(leaf_types[table_it->first]))
146 )
147 );
148 }
149
150 return spns;
151}
‍mutable namespace
Definition: Backend.hpp:10
T(x)
ThreadSafeStringPool::proxy_type ThreadSafePooledString
Definition: Pool.hpp:464
std::unique_ptr< ast::Stmt > M_EXPORT statement_from_string(Diagnostic &diag, const std::string &str)
Use lexer, parser, and semantic analysis to create a Stmt from str.
Definition: mutable.cpp:25
void M_EXPORT execute_query(Diagnostic &diag, const ast::SelectStmt &stmt, std::unique_ptr< Consumer > consumer)
Optimizes and executes the given SelectStmt.
Definition: mutable.cpp:368
static Catalog & Get()
Return a reference to the single Catalog instance.
A Schema represents a sequence of identifiers, optionally with a prefix, and their associated types.
Definition: Schema.hpp:39
A wrapper class for an Spn to be used in the context of databases.
Definition: SpnWrapper.hpp:13
std::size_t num_rows() const
returns the number of rows in the SPN.
Definition: SpnWrapper.hpp:72
static std::unordered_map< ThreadSafePooledString, SpnWrapper * > learn_spn_database(const ThreadSafePooledString &name_of_database, std::unordered_map< ThreadSafePooledString, std::vector< Spn::LeafType > > leaf_types=decltype(leaf_types)())
Learn SPNs over the tables in the given database.
Definition: SpnWrapper.cpp:133
float lower_bound(const AttrFilter &attr_filter) const
Compute the lower bound probability for continuous domains.
Definition: SpnWrapper.hpp:87
static SpnWrapper learn_spn_table(const ThreadSafePooledString &name_of_database, const ThreadSafePooledString &name_of_table, std::vector< Spn::LeafType > leaf_types=decltype(leaf_types)())
Learn an SPN over the given table.
Definition: SpnWrapper.cpp:11
static Spn learn_spn(Eigen::MatrixXf &data, Eigen::MatrixXi &null_matrix, std::vector< LeafType > &leaf_types)
Learn an SPN over the given data.
Definition: Spn.cpp:851
@ CONTINUOUS
Definition: Spn.hpp:26
@ AUTO
Definition: Spn.hpp:24
@ DISCRETE
Definition: Spn.hpp:25
A SQL select statement.
Definition: AST.hpp:936