10#include <unordered_map>
47 using Filter = std::unordered_map<unsigned, std::pair<SpnOperator, float>>;
53 const Eigen::MatrixXf &
data;
60 const Eigen::MatrixXf &
data,
82 void dump(std::ostream &out)
const;
102 virtual void print(std::ostream &out, std::size_t num_tabs)
const = 0;
120 std::vector<std::unique_ptr<ChildWithWeight>>
children;
127 std::pair<float, float>
evaluate(
const Filter &filter,
unsigned leaf_id,
EvalType eval_type)
const override;
134 unsigned max_height = 0;
135 for (
auto &child :
children) { max_height = std::max(max_height, child->child->height()); }
136 return 1 + max_height;
144 unsigned max_degree =
children.size();
145 for (
auto &child :
children) { max_degree = std::max(max_degree, child->child->degree()); }
149 std::size_t memory_used = 0;
150 memory_used +=
sizeof *
this +
children.size() *
sizeof(
decltype(
children)::value_type);
152 memory_used += child->child->memory_usage();
157 void print(std::ostream &out, std::size_t num_tabs)
const override;
173 std::vector<std::unique_ptr<ChildWithVariables>>
children;
180 std::pair<float, float>
evaluate(
const Filter &filter,
unsigned leaf_id,
EvalType eval_type)
const override;
187 unsigned max_height = 0;
188 for (
auto &child :
children) { max_height = std::max(max_height, child->child->height()); }
189 return 1 + max_height;
197 unsigned max_degree =
children.size();
198 for (
auto &child :
children) { max_degree = std::max(max_degree, child->child->degree()); }
202 std::size_t memory_used = 0;
203 memory_used +=
sizeof *
this +
children.size() *
sizeof(
decltype(
children)::value_type);
205 memory_used += child->variables.size() *
sizeof child->variables;
206 memory_used += child->child->memory_usage();
211 void print(std::ostream &out, std::size_t num_tabs)
const override;
228 bool operator<(
float other)
const {
return this->value < other; }
240 std::pair<float, float>
evaluate(
const Filter &bin_value,
unsigned leaf_id,
EvalType eval_type)
const override;
246 unsigned height()
const override {
return 0; }
247 unsigned breadth()
const override {
return 1; }
248 unsigned degree()
const override {
return 0; }
250 return sizeof *
this +
bins.size() *
sizeof(
decltype(
bins)::value_type);
252 void print(std::ostream &out, std::size_t num_tabs)
const override;
269 bool operator<(
float other)
const {
return this->upper_bound < other; }
278 std::vector<Bin>
bins,
291 std::pair<float, float>
evaluate(
const Filter &filter,
unsigned leaf_id,
EvalType eval_type)
const override;
297 unsigned height()
const override {
return 0; }
298 unsigned breadth()
const override {
return 1; }
299 unsigned degree()
const override {
return 0; }
301 return sizeof *
this +
bins.size() *
sizeof(
decltype(
bins)::value_type);
303 void print(std::ostream &out, std::size_t num_tabs)
const override;
327 LearningData &learning_data,
328 std::vector<SmallBitset> &column_candidates,
329 std::vector<SmallBitset> &variable_candidates
333 static std::unique_ptr<Spn::Sum>
create_sum(LearningData &learning_data);
336 static std::unique_ptr<Node>
learn_node(LearningData &learning_data);
348 static Spn learn_spn(Eigen::MatrixXf &data, Eigen::MatrixXi &null_matrix, std::vector<LeafType> &leaf_types);
379 void update_row(Eigen::VectorXf &old_row, Eigen::VectorXf &updated_row);
394 std::size_t memory_used = 0;
395 memory_used +=
sizeof root_;
396 memory_used +=
root_->memory_usage();
401 void dump(std::ostream &out)
const;
Implements a small and efficient set over integers in the range of 0 to 63 (including).
float cumulative_probability
the cumulative probability of this and all predecessor bins; in the range [0;1]
bool operator<(float other) const
float upper_bound
the upper bound of this bin
bool operator<(Bin &other) const
Bin(float upper_bound, float cumulative_probability)
unsigned height() const override
std::size_t estimate_number_distinct_values(unsigned id) const override
std::vector< Bin > bins
bins of this leaf
void print(std::ostream &out, std::size_t num_tabs) const override
std::pair< float, float > evaluate(const Filter &filter, unsigned leaf_id, EvalType eval_type) const override
Evaluate the SPN bottom up with a filter condition.
unsigned degree() const override
void update(Eigen::VectorXf &row, SmallBitset variables, UpdateType update_type) override
float null_probability
the probability of null values in this leaf
unsigned breadth() const override
float lower_bound
the lower bound of the first bin
std::size_t memory_usage() const override
ContinuousLeaf(std::vector< Bin > bins, float lower_bound, float lower_bound_probability, float null_probability, std::size_t num_rows)
float lower_bound_probability
probability of the lower_bound
bool operator<(Bin &other) const
float cumulative_probability
the cumulative probability of this and all predecessor bins; in the range [0;1]
Bin(float value, float cumulative_probability)
bool operator<(float other) const
float value
the value of this bin
unsigned breadth() const override
unsigned degree() const override
std::vector< Bin > bins
bins of this leaf
std::size_t memory_usage() const override
void update(Eigen::VectorXf &row, SmallBitset variables, UpdateType update_type) override
void print(std::ostream &out, std::size_t num_tabs) const override
std::pair< float, float > evaluate(const Filter &bin_value, unsigned leaf_id, EvalType eval_type) const override
Evaluate the SPN bottom up with a filter condition.
DiscreteLeaf(std::vector< Bin > bins, float null_probability, std::size_t num_rows)
std::size_t estimate_number_distinct_values(unsigned id) const override
float null_probability
the probability of null values in this leaf
unsigned height() const override
const Eigen::MatrixXf & data
const Eigen::MatrixXf & normalized
std::vector< LeafType > leaf_types
LearningData(const Eigen::MatrixXf &data, const Eigen::MatrixXf &normalized, const Eigen::MatrixXi &null_matrix, SmallBitset variables, std::vector< LeafType > leaf_types)
const Eigen::MatrixXi & null_matrix
Node(std::size_t num_rows)
virtual unsigned degree() const =0
virtual void update(Eigen::VectorXf &row, SmallBitset variables, UpdateType update_type)=0
virtual void print(std::ostream &out, std::size_t num_tabs) const =0
virtual std::size_t estimate_number_distinct_values(unsigned id) const =0
virtual std::pair< float, float > evaluate(const Filter &filter, unsigned leaf_id, EvalType eval_type) const =0
Evaluate the SPN bottom up with a filter condition.
virtual unsigned breadth() const =0
virtual std::size_t memory_usage() const =0
virtual unsigned height() const =0
ChildWithVariables(std::unique_ptr< Node > child, SmallBitset variables)
std::unique_ptr< Node > child
a child of the Product node
SmallBitset variables
the set of variables(attributes), that are in this child
unsigned breadth() const override
unsigned height() const override
std::pair< float, float > evaluate(const Filter &filter, unsigned leaf_id, EvalType eval_type) const override
Evaluate the SPN bottom up with a filter condition.
void print(std::ostream &out, std::size_t num_tabs) const override
void update(Eigen::VectorXf &row, SmallBitset variables, UpdateType update_type) override
Product(std::vector< std::unique_ptr< ChildWithVariables > > children, std::size_t num_rows)
unsigned degree() const override
std::size_t estimate_number_distinct_values(unsigned id) const override
std::size_t memory_usage() const override
std::vector< std::unique_ptr< ChildWithVariables > > children
float weight
weight of a child of a sum node
std::unique_ptr< Node > child
Eigen::VectorXf centroid
centroid of this child according to kmeans cluster
ChildWithWeight(std::unique_ptr< Node > child, float weight, Eigen::VectorXf centroid)
void update(Eigen::VectorXf &row, SmallBitset variables, UpdateType update_type) override
std::vector< std::unique_ptr< ChildWithWeight > > children
unsigned breadth() const override
Sum(std::vector< std::unique_ptr< ChildWithWeight > > children, std::size_t num_rows)
unsigned degree() const override
std::size_t estimate_number_distinct_values(unsigned id) const override
std::size_t memory_usage() const override
void print(std::ostream &out, std::size_t num_tabs) const override
std::pair< float, float > evaluate(const Filter &filter, unsigned leaf_id, EvalType eval_type) const override
Evaluate the SPN bottom up with a filter condition.
unsigned height() const override
Tree structure for Sum Product Networks.
static Spn learn_spn(Eigen::MatrixXf &data, Eigen::MatrixXi &null_matrix, std::vector< LeafType > &leaf_types)
Learn an SPN over the given data.
float lower_bound(const Filter &filter) const
Compute the lower bound probability for continuous domains.
void update_row(Eigen::VectorXf &old_row, Eigen::VectorXf &updated_row)
Update the SPN with the given row.
float likelihood(const Filter &filter) const
Compute the likelihood of the given filter predicates given by a map from attribute to the respective...
void delete_row(Eigen::VectorXf &row)
Delete the given row from the SPN.
float upper_bound(const Filter &filter) const
Compute the upper bound probability for continuous domains.
std::size_t estimate_number_distinct_values(unsigned attribute_id) const
Estimate the number of distinct values of the given attribute.
float expectation(unsigned attribute_id, const Filter &filter) const
Compute the expectation of the given attribute.
static std::unique_ptr< Spn::Product > create_product_min_slice(LearningData &learning_data)
Create a product node by splitting all columns.
std::size_t num_rows() const
returns the number of rows in the SPN.
void insert_row(Eigen::VectorXf &row)
Insert the given row into the SPN.
static std::unique_ptr< Node > learn_node(LearningData &learning_data)
Recursively learns the nodes of an SPN.
std::unique_ptr< Node > root_
std::unordered_map< unsigned, std::pair< SpnOperator, float > > Filter
Spn(std::size_t num_rows, std::unique_ptr< Node > root)
static std::unique_ptr< Spn::Sum > create_sum(LearningData &learning_data)
Create a sum node by clustering the rows.
void update(Eigen::VectorXf &row, UpdateType update_type)
Update the SPN from the top down and adjust weights of sum nodes and the distributions on leaves.
std::size_t memory_usage() const
LeafType
The different types of leaves for an attribute.
static std::unique_ptr< Product > create_product_rdc(LearningData &learning_data, std::vector< SmallBitset > &column_candidates, std::vector< SmallBitset > &variable_candidates)
Create a product node with the given candidates (vertical clustering)