20typedef Matrix<double, Dynamic, Dynamic, RowMajor>
RowMatrixXd;
21std::pair<RowMatrixXd, VectorXd>
load_csv (
const char *csv_path)
23 std::ifstream csv_file(csv_path);
25 std::cerr <<
"Filepath \"" << csv_path <<
"\" is invalid.";
29 std::vector<double> feature_values;
30 std::vector<double> target_values;
33 std::getline(csv_file, line);
34 std::stringstream ls(line);
37 while (std::getline(ls, ss,
',')) {
38 if (ss.find(
"time") != std::string::npos) {
46 while (std::getline(csv_file, line)) {
48 feature_values.push_back(1.0);
50 std::stringstream line_stream(line);
53 while (std::getline(line_stream, cell,
',')) {
56 target_values.push_back(std::stod(cell));
58 feature_values.push_back(std::stod(cell));
64 Map<RowMatrixXd> feature_matrix(feature_values.data(), rows, feature_values.size()/rows);
65 Map<VectorXd> target_vector(target_values.data(), rows, 1);
66 return std::pair<RowMatrixXd, VectorXd>(feature_matrix, target_vector);
75 std::ifstream csv_file(csv_path);
77 std::cerr <<
"Filepath \"" << csv_path <<
"\" is invalid.";
82 std::vector<double> coefficients;
85 while (std::getline(csv_file, line)) {
86 coefficients.push_back(std::stod(line));
89 M_insist(std::floor(rows / 2) == degree);
90 Map<VectorXd> coefficients_vector(coefficients.data(),rows, 1);
92 return CostModel(coefficients_vector, 3, [degree](Eigen::MatrixXd featureMatrix) {
94 featureMatrix.conservativeResize(featureMatrix.rows(), 2 * degree + 1);
95 for (
unsigned row = 0; row < featureMatrix.rows(); ++row) {
96 for (
unsigned i = 2; i <= degree; ++i) {
97 featureMatrix(row, 2 * i - 1) = featureMatrix(row, 1) * std::pow(featureMatrix(row, 2), i - 1);
98 featureMatrix(row, 2 * i) = std::pow(featureMatrix(row, 2), i);
101 return featureMatrix;
111 std::ifstream csv_file(csv_path);
113 std::cerr <<
"Filepath \"" << csv_path <<
"\" is invalid.";
118 std::vector<double> coefficients;
121 while (std::getline(csv_file, line)) {
122 coefficients.push_back(std::stod(line));
125 Map<VectorXd> coefficients_vector(coefficients.data(),rows, 1);
135void usage(std::ostream &out,
const char *name)
137 out <<
"A command line tool to generate physical operator cost models.\n"
138 <<
"USAGE:\n\t" << name <<
" <CSV-FOLDER>"
142int main(
int argc,
const char **argv)
148 const char* gen_filter_model;
149 const char* gen_group_by_model;
150 const char* gen_join_model;
152 const char* load_filter_model;
153 const char* load_group_by_model;
154 const char* load_join_model;
156 const char* eval_filter_model;
157 const char* eval_group_by_model;
158 const char* eval_join_model;
165 double num_distinct_values;
168 double num_rows_left;
169 double num_rows_right;
170 double redundancy_left;
171 double redundancy_right;
181#define ADD(TYPE, VAR, INIT, SHORT, LONG, DESCR, CALLBACK)\
184 AP.add<TYPE>(SHORT, LONG, DESCR, CALLBACK);\
186 ADD(
bool,
args.show_help,
false,
188 "prints this help message",
189 [&](
bool) { args.show_help = true; });
190 ADD(
const char*,
args.gen_filter_model,
nullptr,
192 "generate a filter cost model and saves it in the given folder",
193 [&](
const char* str) { args.gen_filter_model = str; });
194 ADD(
const char*,
args.gen_group_by_model,
nullptr,
196 "generate a group by cost model and saves it in the given folder",
197 [&](
const char* str) { args.gen_group_by_model = str; });
198 ADD(
const char*,
args.gen_join_model,
nullptr,
200 "generate a join cost modeland saves it in the given folder",
201 [&](
const char* str) { args.gen_join_model = str; });
202 ADD(
const char *,
args.load_filter_model,
nullptr,
203 nullptr,
"--load_filter",
204 "load a filter model from csv file",
205 [&](
const char *str) { args.load_filter_model = str; });
206 ADD(
const char *,
args.load_group_by_model,
nullptr,
207 nullptr,
"--load_group_by",
208 "load a group by model from csv file",
209 [&](
const char *str) { args.load_group_by_model = str; });
210 ADD(
const char *,
args.load_join_model,
nullptr,
211 nullptr,
"--load_join",
212 "load a join model from csv file",
213 [&](
const char *str) { args.load_join_model = str; });
214 ADD(
const char *,
args.eval_filter_model,
nullptr,
215 nullptr,
"--eval_filter",
216 "load & evaluate a filter model from csv file",
217 [&](
const char *str) { args.eval_filter_model = str; });
218 ADD(
const char *,
args.eval_group_by_model,
nullptr,
219 nullptr,
"--eval_group_by",
220 "load & evaluate a group by model from csv file",
221 [&](
const char *str) { args.eval_group_by_model = str; });
222 ADD(
const char *,
args.eval_join_model,
nullptr,
223 nullptr,
"--eval_join",
224 "load & evaluate a join model from csv file",
225 [&](
const char *str) { args.eval_join_model = str; });
228 "set the polynomial degree used in the filter cost model (default = 9)",
229 [&](
int nr) { args.degree = nr; });
231 nullptr,
"--num_rows",
232 "set the number of rows used in the cost model prediction",
233 [&](
int nr) { args.num_rows = double(nr); });
234 ADD(
int,
args.num_distinct_values, 0,
235 nullptr,
"--num_distinct_values",
236 "set the number of distinct values used in the cost model prediction",
237 [&](
int ndv) { args.num_distinct_values = double(ndv); });
239 nullptr,
"--selectivity",
240 "set the selectivity used in the cost model prediction (in %)",
241 [&](
int sel) { args.selectivity = double(sel) / 100.0; });
242 ADD(
int,
args.num_rows_left, 0,
243 nullptr,
"--num_rows_left",
244 "set the number of rows used in the cost model prediction (join only)",
245 [&](
int nr) { args.num_rows_left = double(nr); });
246 ADD(
int,
args.num_rows_right, 0,
247 nullptr,
"--num_rows_right",
248 "set the number of rows used in the cost model prediction (join only)",
249 [&](
int nr) { args.num_rows_right = double(nr); });
250 ADD(
int,
args.redundancy_left, 1,
251 nullptr,
"--redundancy_left",
252 "set the redundancy of a value in the cost model prediction (join only)",
253 [&](
int red) { args.redundancy_left = double(red); });
254 ADD(
int,
args.redundancy_right, 1,
255 nullptr,
"--redundancy_right",
256 "set the redundancy of a value in the cost model prediction (join only)",
257 [&](
int red) { args.redundancy_right = double(red); });
259 nullptr,
"--result_size",
260 "set the size of the result in the cost model prediction (join only)",
261 [&](
int res) { args.result_size = double(res); });
263 ADD(
const char *,
args.backend,
265 nullptr,
"--backend",
266 "specify the execution backend",
268 [&](
const char *str) {
270 C.default_backend(C.pool(str));
272 } catch (std::invalid_argument) {
273 std::cerr <<
"There is no execution backend with the name \"" << str <<
"\".\n" << AP;
274 std::exit(EXIT_FAILURE);
279 AP.parse_args(argc, argv);
281 if (
args.show_help) {
282 usage(std::cout, argv[0]);
283 std::cout <<
"WHERE\n" << AP;
284 std::exit(EXIT_SUCCESS);
287 if (AP.args().size() != 0) {
288 std::cerr <<
"ERROR: Too many arguments.\n";
289 usage(std::cerr, argv[0]);
290 std::exit(EXIT_FAILURE);
293 if (
args.gen_filter_model) {
294 std::cout <<
"Measurement data will be written to '" <<
args.gen_filter_model <<
"'.\n";
295 auto costmodel = CostModelFactory::get_cost_model<int32_t>(OperatorKind::FilterOperator,
296 args.gen_filter_model,
299 Eigen::RowVectorXd feature_matrix(2);
300 feature_matrix <<
args.num_rows,
args.selectivity;
301 std::cout << costmodel.predict_target(feature_matrix) << std::endl;
305 if (
args.gen_group_by_model) {
306 std::cout <<
"Measurement data will be written to '" <<
args.gen_group_by_model <<
"'.\n";
307 auto costmodel = CostModelFactory::get_cost_model<int32_t>(OperatorKind::GroupingOperator,
308 args.gen_group_by_model);
310 Eigen::RowVectorXd feature_matrix(2);
311 feature_matrix <<
args.num_rows,
args.num_distinct_values;
312 std::cout << costmodel.predict_target(feature_matrix) << std::endl;
316 if (
args.gen_join_model) {
317 std::cout <<
"Measurement data will be written to '" <<
args.gen_join_model <<
"'.\n";
318 auto costmodel = CostModelFactory::get_cost_model<int32_t>(OperatorKind::JoinOperator,
319 args.gen_join_model);
321 Eigen::RowVectorXd feature_matrix(5);
322 feature_matrix <<
args.num_rows_left,
args.num_rows_right,
args.redundancy_left,
args.redundancy_right,
324 std::cout << costmodel.predict_target(feature_matrix) << std::endl;
328 if (
args.load_filter_model) {
329 auto costmodel = load_filter_cost_model<int32_t>(
args.load_filter_model);
331 Eigen::RowVectorXd feature_matrix(2);
332 feature_matrix <<
args.num_rows,
args.selectivity;
333 std::cout << costmodel.predict_target(feature_matrix) << std::endl;
337 if (
args.load_group_by_model) {
338 auto costmodel = load_cost_model<int32_t>(
args.load_group_by_model);
340 Eigen::RowVectorXd feature_matrix(2);
341 feature_matrix <<
args.num_rows,
args.num_distinct_values;
342 std::cout << costmodel.predict_target(feature_matrix) << std::endl;
346 if (
args.load_join_model) {
347 auto costmodel = load_cost_model<int32_t>(
args.load_join_model);
349 Eigen::RowVectorXd feature_matrix(5);
350 feature_matrix <<
args.num_rows_left,
args.num_rows_right,
args.redundancy_left,
args.redundancy_right,
352 std::cout << costmodel.predict_target(feature_matrix) << std::endl;
356 if (
args.eval_filter_model) {
360 if (
args.eval_group_by_model) {
364 if (
args.eval_join_model) {
bool show_help
whether to show a help message
A parser for command line arguments.
A model for predicting the costs of a physical operator.
The catalog contains all Databases and keeps track of all meta information of the database system.
static Catalog & Get()
Return a reference to the single Catalog instance.
CostModel load_cost_model(const char *csv_path)
Load a cost model for an operator without transformations from a file.
std::pair< RowMatrixXd, VectorXd > load_csv(const char *csv_path)
Matrix< double, Dynamic, Dynamic, RowMajor > RowMatrixXd
Parses csv file and returns a pair of matrices.
void usage(std::ostream &out, const char *name)
#define ADD(TYPE, VAR, INIT, SHORT, LONG, DESCR, CALLBACK)
CostModel load_filter_cost_model(const char *csv_path, unsigned degree=9)
Load a cost model for the filter operator from a file.