1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
|
#ifndef __PCA__HPP__
#define __PCA__HPP__
#include <iostream>
#include <unordered_set>
#include "Eigen/Dense"
class PCA {
public:
PCA(Eigen::MatrixXd& m) { //Eigen::Map<Eigen::MatrixXd>& m) {
dat_ = m;
/*
dat_ = Eigen::MatrixXd(m.rows(), m.cols());
for(size_t i = 0; i < m.rows(); ++i) {
for (size_t j = 0; j < m.cols(); ++j) {
dat_(i,j) = m(i,j);
}
}
*/
}
void performDecomposition(bool standardize=true) {
if (standardize) {
std::unordered_set<uint32_t> droppedCols;
size_t nrows = dat_.rows();
Eigen::VectorXd means(dat_.cols());
means = dat_.colwise().mean();
Eigen::VectorXd stdDev(dat_.cols());
decltype(dat_.cols()) i, j;
for (i = 0; i < dat_.cols(); ++i) {
for (j = 0; j < dat_.rows(); ++j) {
dat_(j,i) -= means(i);
}
auto x = dat_.col(i).dot(dat_.col(i));
auto sd = std::sqrt(x / nrows);
if (sd < 1e-20) {
droppedCols.insert(i);
} else {
dat_.col(i) /= sd;
}
}
// If we dropped any columns for having a "0" standard deviation,
// then create a new normalized data matrix without them.
if (droppedCols.size() > 0) {
Eigen::MatrixXd tmpDat(dat_.rows(), dat_.cols() - droppedCols.size());
size_t curCol{0};
decltype(dat_.cols()) i;
for (i = 0; i < dat_.cols(); ++i) {
if (droppedCols.find(i) == droppedCols.end()) {
tmpDat.col(curCol) = dat_.col(i);
++curCol;
}
}
std::swap(dat_, tmpDat);
}
}
//std::cerr << "DATA = " << dat_ << "\n";
Eigen::JacobiSVD<Eigen::MatrixXd> svd(dat_,
Eigen::ComputeThinU |
Eigen::ComputeThinV);
proj_ = svd.matrixV();
variances_ = svd.singularValues();
//std::cerr << "V = " << proj_ << "\n";
//std::cerr << "S = " << variances_;
}
uint32_t varFrac(double frac, bool verbose=false) {
uint32_t cutoff{0};
bool haveCutoff{false};
if (verbose) {
std::cerr << "variances: " << variances_ << "\n";
}
Eigen::VectorXd fracs(variances_.rows());
double totalVar{0.0};
for (uint32_t i = 0; i < variances_.rows(); ++i) {
variances_(i) = variances_(i) * variances_(i);
totalVar += variances_(i);
}
double partialSum{0.0};
for (uint32_t i = 0; i < variances_.rows(); ++i) {
partialSum += variances_(i);
fracs(i) = partialSum / totalVar;
if (!haveCutoff and fracs(i) >= frac) {
cutoff = i;
haveCutoff = true;
}
}
if (verbose) {
std::cerr << "fracs = " << fracs << "\n";
std::cerr << "include the first " << cutoff + 1 << " components to reach cutoff of " << frac << "\n";
}
return cutoff + 1;
}
Eigen::MatrixXd projectedData(double frac, bool verbose=false) {
uint32_t numVec = varFrac(frac, verbose);
return dat_ * proj_.block(0, 0, proj_.rows(), numVec);
}
private:
Eigen::MatrixXd dat_;
Eigen::MatrixXd proj_;
Eigen::VectorXd variances_;
};
#endif //__PCA__HPP__
|