mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
RDC.cpp
Go to the documentation of this file.
1#include "RDC.hpp"
2
3#include <Eigen/Dense>
5#include <vector>
6
7
8using namespace m;
9using namespace Eigen;
10
11
12/*======================================================================================================================
13 * Helper functions
14 *====================================================================================================================*/
15
16MatrixXf m::create_CDF_matrix(const MatrixXf &data)
17{
18 /* Allocate CDF matrix. */
19 MatrixXf CDF_matrix(data.rows(), data.cols() + 1); // +1 for y-intercepts in non-linear transformations in RDC
20
21 /* Allocate index vector and CDF vector. */
22 std::vector<unsigned> indices;
23 indices.resize(data.rows());
24
25 for (unsigned col_id = 0; col_id != data.cols(); ++col_id) {
26 /* Initialize index vector. */
27 M_insist(indices.size() == (unsigned long)(data.rows()));
28 std::iota(indices.begin(), indices.end(), 0);
29
30 /* Compute index vector that sorts the column. */
31 std::sort(indices.begin(), indices.end(), [col=data.col(col_id)](unsigned first, unsigned second) {
32 return col(first) < col(second);
33 });
34
35 /* Translate index vector to CDF. */
36 unsigned last_pos = data.rows() - 1;
37 float last_value = data(indices[last_pos], col_id);
38 unsigned num_same = 0;
39 for (unsigned i = data.rows(); i-->0;) {
40 if (data(indices[i], col_id) != last_value) {
41 last_value = data(indices[i], col_id);
42 last_pos -= num_same;
43 num_same = 0;
44 }
45 CDF_matrix(indices[i], col_id) = float(last_pos + 1) / data.rows();
46 ++num_same;
47 }
48 }
49
50 /* Add last, all-ones column for Y-intercept of non-liner transformations, used later for RDC computation. */
51 CDF_matrix.col(data.cols()) = VectorXf::Ones(data.rows());
52
53 return CDF_matrix;
54}
55
56namespace {
57
58template<typename URBG>
59MatrixXf create_w_b(float stddev, unsigned num_rows, unsigned k, URBG &&g)
60{
61 std::normal_distribution<float> normal_distribution(0.f, stddev);
62 MatrixXf w_b(num_rows, k);
63 for (unsigned col_id = 0; col_id != k; ++col_id) {
64 auto col = w_b.col(col_id);
65 for (unsigned row_id = 0; row_id != num_rows; ++row_id)
66 col(row_id) = normal_distribution(g);
67 }
68 return w_b;
69}
70
71}
72
73
74/*======================================================================================================================
75 * RDC
76 *====================================================================================================================*/
77
78float m::rdc(const Eigen::MatrixXf &X, const Eigen::MatrixXf &Y, unsigned k, float stddev)
79{
80 M_insist(X.rows() == Y.rows());
81
82 /* Estimate copula transformation by CDFs. */
83 MatrixXf CDFs_of_X = create_CDF_matrix(X);
84 MatrixXf CDFs_of_Y = create_CDF_matrix(Y);
85 M_insist(CDFs_of_X.rows() == X.rows());
86 M_insist(CDFs_of_Y.rows() == X.rows());
87 M_insist(CDFs_of_X.cols() == X.cols() + 1);
88 M_insist(CDFs_of_Y.cols() == Y.cols() + 1);
89
90 return rdc_precomputed_CDF(CDFs_of_X, CDFs_of_Y, k, stddev);
91}
92
93template<typename URBG>
94float m::rdc_precomputed_CDF(MatrixXf &CDFs_of_X, MatrixXf &CDFs_of_Y, unsigned k, float stddev, URBG &&g)
95{
96 unsigned num_rows = CDFs_of_X.rows();
97 M_insist(num_rows == CDFs_of_Y.rows());
98
99 /* Generate random non-linear projections. */
100 MatrixXf X_w_b = create_w_b(stddev, CDFs_of_X.cols(), k, g);
101 MatrixXf Y_w_b = create_w_b(stddev, CDFs_of_Y.cols(), k, g);
102 M_insist(X_w_b.rows() == CDFs_of_X.cols());
103 M_insist(Y_w_b.rows() == CDFs_of_Y.cols());
104 M_insist(X_w_b.cols() == k);
105 M_insist(Y_w_b.cols() == k);
106
107 /* Compute canonical correlations. */
108 MatrixXf concat_matrix(CDFs_of_X.rows(), 2 * k);
109 concat_matrix << (CDFs_of_X * X_w_b), (CDFs_of_Y * Y_w_b);
110 concat_matrix.array() = concat_matrix.array().sin();
111
112 /* Center concat matrix. */
113 const RowVectorXf means = concat_matrix.colwise().mean();
114 concat_matrix.rowwise() -= means;
115
116 /* Compute covariance matrix. */
117 MatrixXf covariance_matrix = concat_matrix.adjoint() * concat_matrix / float(num_rows - 1);
118
119 FullPivLU<MatrixXf> CXX(covariance_matrix.block(0, 0, k, k));
120 FullPivLU<MatrixXf> CYY(covariance_matrix.block(k, k, k, k));
121 MatrixXf CXY = covariance_matrix.block(0, k, k, k);
122 MatrixXf CYX = covariance_matrix.block(k, 0, k, k);
123
124 SelfAdjointEigenSolver<MatrixXf> eigensolver((CXX.inverse() * CXY) * (CYY.inverse() * CYX));
125
126 return sqrtf(eigensolver.eigenvalues().maxCoeff());
127}
#define X(Kind)
Definition: Operator.hpp:621
#define M_insist(...)
Definition: macro.hpp:129
auto indices
Definition: WasmDSL.hpp:2429
‍mutable namespace
Definition: Backend.hpp:10
Eigen::MatrixXf create_CDF_matrix(const Eigen::MatrixXf &data)
Creates a new matrix, mapping every cell of data to its position according to the cell's column's CDF...
float rdc(const Eigen::MatrixXf &X, const Eigen::MatrixXf &Y, unsigned k=5, float stddev=1.f/6.f)
Compute the Randomized Dependence Coefficient of two random variables.
Definition: RDC.cpp:78
float rdc_precomputed_CDF(Eigen::MatrixXf &CDFs_of_X, Eigen::MatrixXf &CDFs_of_Y, unsigned k=5, float stddev=1.f/6.f, URBG &&g=URBG())
Compute the RDC value without having to compute CDF matrices to avoid sorting the same data multiple ...