![]() |
mutable
A Database System for Research and Fast Prototyping
|
Tree structure for Sum Product Networks. More...
#include <Spn.hpp>
Data Structures | |
struct | ContinuousLeaf |
struct | DiscreteLeaf |
struct | LearningData |
struct | Node |
struct | Product |
struct | Sum |
Public Types | |
enum | LeafType { AUTO , DISCRETE , CONTINUOUS } |
The different types of leaves for an attribute. More... | |
enum | SpnOperator { EQUAL , LESS , LESS_EQUAL , GREATER , GREATER_EQUAL , IS_NULL , EXPECTATION } |
enum | EvalType { APPROXIMATE , UPPER_BOUND , LOWER_BOUND } |
enum | UpdateType { INSERT , DELETE } |
using | Filter = std::unordered_map< unsigned, std::pair< SpnOperator, float > > |
Public Member Functions | |
std::size_t | num_rows () const |
returns the number of rows in the SPN. | |
float | likelihood (const Filter &filter) const |
Compute the likelihood of the given filter predicates given by a map from attribute to the respective operator and value. | |
float | upper_bound (const Filter &filter) const |
Compute the upper bound probability for continuous domains. | |
float | lower_bound (const Filter &filter) const |
Compute the lower bound probability for continuous domains. | |
float | expectation (unsigned attribute_id, const Filter &filter) const |
Compute the expectation of the given attribute. | |
void | update_row (Eigen::VectorXf &old_row, Eigen::VectorXf &updated_row) |
Update the SPN with the given row. | |
void | insert_row (Eigen::VectorXf &row) |
Insert the given row into the SPN. | |
void | delete_row (Eigen::VectorXf &row) |
Delete the given row from the SPN. | |
std::size_t | estimate_number_distinct_values (unsigned attribute_id) const |
Estimate the number of distinct values of the given attribute. | |
unsigned | height () const |
unsigned | breadth () const |
unsigned | degree () const |
std::size_t | memory_usage () const |
void | dump () const |
void | dump (std::ostream &out) const |
Static Public Member Functions | |
static Spn | learn_spn (Eigen::MatrixXf &data, Eigen::MatrixXi &null_matrix, std::vector< LeafType > &leaf_types) |
Learn an SPN over the given data. | |
Private Member Functions | |
Spn (std::size_t num_rows, std::unique_ptr< Node > root) | |
void | update (Eigen::VectorXf &row, UpdateType update_type) |
Update the SPN from the top down and adjust weights of sum nodes and the distributions on leaves. | |
Static Private Member Functions | |
static std::unique_ptr< Spn::Product > | create_product_min_slice (LearningData &learning_data) |
Create a product node by splitting all columns. | |
static std::unique_ptr< Product > | create_product_rdc (LearningData &learning_data, std::vector< SmallBitset > &column_candidates, std::vector< SmallBitset > &variable_candidates) |
Create a product node with the given candidates (vertical clustering) | |
static std::unique_ptr< Spn::Sum > | create_sum (LearningData &learning_data) |
Create a sum node by clustering the rows. | |
static std::unique_ptr< Node > | learn_node (LearningData &learning_data) |
Recursively learns the nodes of an SPN. | |
Private Attributes | |
std::size_t | num_rows_ |
std::unique_ptr< Node > | root_ |
using m::Spn::Filter = std::unordered_map<unsigned, std::pair<SpnOperator, float> > |
enum m::Spn::EvalType |
enum m::Spn::LeafType |
enum m::Spn::SpnOperator |
enum m::Spn::UpdateType |
|
inlineprivate |
|
inline |
|
staticprivate |
Create a product node by splitting all columns.
Definition at line 577 of file Spn.cpp.
References m::SmallBitset::begin(), m::Spn::LearningData::data, m::Spn::LearningData::leaf_types, learn_node(), m::Spn::LearningData::normalized, m::Spn::LearningData::null_matrix, and m::Spn::LearningData::variables.
Referenced by create_sum(), and learn_node().
|
staticprivate |
Create a product node with the given candidates (vertical clustering)
Definition at line 602 of file Spn.cpp.
References m::Spn::LearningData::data, m::Spn::LearningData::leaf_types, learn_node(), m::Spn::LearningData::normalized, and m::Spn::LearningData::null_matrix.
Referenced by create_sum(), and learn_node().
|
staticprivate |
Create a sum node by clustering the rows.
Definition at line 633 of file Spn.cpp.
References m::and, create_product_min_slice(), create_product_rdc(), create_sum(), m::Spn::LearningData::data, m::kmeans_with_centroids(), m::Spn::LearningData::leaf_types, m::Spn::LearningData::normalized, m::Spn::LearningData::null_matrix, num_rows(), and m::Spn::LearningData::variables.
Referenced by create_sum(), and learn_node().
|
inline |
void Spn::delete_row | ( | Eigen::VectorXf & | row | ) |
Delete the given row from the SPN.
Definition at line 940 of file Spn.cpp.
References DELETE, num_rows_, and update().
Referenced by m::SpnWrapper::delete_row(), and update_row().
void Spn::dump | ( | ) | const |
Definition at line 951 of file Spn.cpp.
References dump().
Referenced by m::SpnWrapper::dump(), and dump().
void Spn::dump | ( | std::ostream & | out | ) | const |
std::size_t Spn::estimate_number_distinct_values | ( | unsigned | attribute_id | ) | const |
Estimate the number of distinct values of the given attribute.
Definition at line 946 of file Spn.cpp.
References root_.
Referenced by m::SpnWrapper::estimate_number_distinct_values().
float Spn::expectation | ( | unsigned | attribute_id, |
const Filter & | filter | ||
) | const |
Compute the expectation of the given attribute.
Definition at line 913 of file Spn.cpp.
References APPROXIMATE, EXPECTATION, likelihood(), and root_.
Referenced by m::Spn::DiscreteLeaf::evaluate(), m::Spn::Sum::evaluate(), m::Spn::Product::evaluate(), m::Spn::ContinuousLeaf::evaluate(), and m::SpnWrapper::expectation().
|
inline |
void Spn::insert_row | ( | Eigen::VectorXf & | row | ) |
Insert the given row into the SPN.
Definition at line 934 of file Spn.cpp.
References INSERT, num_rows_, and update().
Referenced by m::SpnWrapper::insert_row(), and update_row().
|
staticprivate |
Recursively learns the nodes of an SPN.
Definition at line 751 of file Spn.cpp.
References create_product_min_slice(), create_product_rdc(), create_sum(), m::Spn::LearningData::data, DISCRETE, m::Spn::LearningData::leaf_types, lower_bound(), m::Spn::LearningData::null_matrix, num_rows(), and m::Spn::LearningData::variables.
Referenced by create_product_min_slice(), create_product_rdc(), and learn_spn().
|
static |
Learn an SPN over the given data.
data | the data |
null_matrix | the NULL values of the data as a matrix |
attribute_to_id | a map from the attributes (random variables) to internal id |
leaf_types | the types of a leaf for a non-primary key attribute |
Definition at line 851 of file Spn.cpp.
References m::SmallBitset::All(), learn_node(), and num_rows().
float Spn::likelihood | ( | const Filter & | filter | ) | const |
Compute the likelihood of the given filter predicates given by a map from attribute to the respective operator and value.
The predicates in the map are seen as conjunctions.
Definition at line 898 of file Spn.cpp.
References APPROXIMATE, and root_.
Referenced by m::Spn::Sum::evaluate(), m::Spn::Product::evaluate(), expectation(), and m::SpnWrapper::likelihood().
float Spn::lower_bound | ( | const Filter & | filter | ) | const |
Compute the lower bound probability for continuous domains.
Definition at line 908 of file Spn.cpp.
References LOWER_BOUND, and root_.
Referenced by m::Spn::DiscreteLeaf::evaluate(), m::Spn::ContinuousLeaf::evaluate(), learn_node(), m::SpnWrapper::lower_bound(), m::Spn::ContinuousLeaf::print(), m::Spn::DiscreteLeaf::update(), and m::Spn::ContinuousLeaf::update().
|
inline |
Definition at line 393 of file Spn.hpp.
References root_.
Referenced by m::SpnWrapper::memory_usage().
|
inline |
returns the number of rows in the SPN.
Definition at line 314 of file Spn.hpp.
References num_rows_.
Referenced by create_sum(), m::Spn::Sum::estimate_number_distinct_values(), m::Spn::ContinuousLeaf::estimate_number_distinct_values(), learn_node(), learn_spn(), m::SpnWrapper::num_rows(), m::Spn::Sum::update(), m::Spn::DiscreteLeaf::update(), and m::Spn::ContinuousLeaf::update().
|
private |
Update the SPN from the top down and adjust weights of sum nodes and the distributions on leaves.
row | the row to update in the SPN |
update_type | the type of update (insert or delete) |
Definition at line 892 of file Spn.cpp.
References root_.
Referenced by delete_row(), and insert_row().
void Spn::update_row | ( | Eigen::VectorXf & | old_row, |
Eigen::VectorXf & | updated_row | ||
) |
Update the SPN with the given row.
Definition at line 928 of file Spn.cpp.
References delete_row(), and insert_row().
Referenced by m::SpnWrapper::update_row().
float Spn::upper_bound | ( | const Filter & | filter | ) | const |
Compute the upper bound probability for continuous domains.
Definition at line 903 of file Spn.cpp.
References root_, and UPPER_BOUND.
Referenced by m::Spn::DiscreteLeaf::evaluate(), m::Spn::ContinuousLeaf::evaluate(), m::Spn::ContinuousLeaf::update(), and m::SpnWrapper::upper_bound().
|
private |
Definition at line 306 of file Spn.hpp.
Referenced by delete_row(), insert_row(), and num_rows().
|
private |
Definition at line 307 of file Spn.hpp.
Referenced by breadth(), degree(), dump(), estimate_number_distinct_values(), expectation(), height(), likelihood(), lower_bound(), memory_usage(), update(), and upper_bound().