1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
|
/*
* File: kmdsStruct.cpp
*
* Implements metric MDS (multi-dimensional scaling)
* for kmds kmers
*/
#include "kmds.hpp"
arma::mat metricMDS(const arma::mat& populationMatrix, const int dimensions, const unsigned int threads, const std::string& distances_file)
{
/*
* Metric MDS
*
* 1) P^2 -> matrix with elements which are distances squared
* 2) J = I - n^-1(II') - II' is a square matrix of ones
* 3) B = -0.5JP^2J
* 4) Decompose B into eigenvalues
* 5) MDS components = eigenvectors * eigenvalues
* 6) Normalise components
*/
const unsigned int matSize = populationMatrix.n_rows;
// Step 1)
arma::mat P = arma::square(dissimiliarityMatrix(populationMatrix, threads));
// If supplied as an optional parameter, write distance matrix to file
if (!distances_file.empty())
{
writeDistances(distances_file, P);
}
// Step 2)
arma::mat J = arma::eye<arma::mat>(matSize, matSize)
- 1/double(matSize)*arma::ones<arma::mat>(matSize, matSize);
// Step 3)
arma::mat B = -0.5 * J * P * J;
// Step 4)
arma::vec eigval;
arma::mat eigvec;
if (!arma::eig_sym(eigval, eigvec, B))
{
throw std::runtime_error("Could not calculate eigenvalues of B matrix in metric MDS");
}
// Step 5)
// Eigenvalues returned are sorted ascending, so want to reverse order
arma::mat mds = fliplr(eigvec * diagmat(sqrt(eigval)));
// Step 6)
// All values will lie in the interval [-1,1]
arma::mat norm_mds(matSize, dimensions);
for (int i = 0; i < dimensions; ++i)
{
if (pow(max(mds.col(i)), 2) > pow(min(mds.col(i)), 2))
{
norm_mds.col(i) = mds.col(i)/max(mds.col(i));
}
else
{
norm_mds.col(i) = mds.col(i)/fabs(min(mds.col(i)));
}
}
return norm_mds;
}
// Distance between all rows. 0/1 elements only
arma::mat dissimiliarityMatrix(const arma::mat& inMat, const unsigned int threads)
{
const unsigned int matSize = inMat.n_rows;
arma::mat dist = arma::zeros<arma::mat>(matSize, matSize);
// Time parallelisation
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
// Create queue for distance calculations
std::queue<std::future<std::vector<DistanceElement>>> distance_calculations;
// Set up number of jobs per thread
const unsigned long int num_calculations = 0.5*matSize*(matSize - 1);
const unsigned long int calc_per_thread = (unsigned long int)num_calculations / threads;
const unsigned int num_big_threads = num_calculations % threads;
// Keep track of row and column outside of outer loop
unsigned int row = 0;
unsigned int col = 0;
for (unsigned int thread_idx = 0; thread_idx < threads; ++thread_idx) // Loop over threads
{
// First 'big' threads have an extra job
unsigned long int thread_jobs;
if (distance_calculations.size() < num_big_threads)
{
thread_jobs = calc_per_thread + 1;
}
else
{
thread_jobs = calc_per_thread;
}
// Each thread is assigned elements to calculated scanning left to right,
// top to bottom on the upper triangle of the matrix
std::vector<DistanceElement> thread_elements;
thread_elements.reserve(thread_jobs);
for (unsigned int element = 0; element < calc_per_thread; ++element)
{
DistanceElement d;
d.row = row;
d.col = ++col;
thread_elements.push_back(d);
if (col + 1 == matSize)
{
++row;
col = row;
}
}
// Set the thread off
distance_calculations.push(std::async(std::launch::async, threadDistance, thread_elements, std::cref(inMat)));
}
// Wait for thread results to return, and put the results in the distance
// matrix
while(!distance_calculations.empty())
{
std::vector<DistanceElement> distance_elements = distance_calculations.front().get();
distance_calculations.pop();
for(std::vector<DistanceElement>::iterator it = distance_elements.begin() ; it < distance_elements.end(); ++it)
{
dist(it->row, it->col) = it->distance;
dist(it->col, it->row) = it->distance;
}
}
// Normalise by total k-mers
dist = dist / inMat.n_cols;
// Print time taken
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
std::chrono::duration<double> diff = std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
std::cerr << "Distance matrix calculated in: " << diff.count() << " s\n";
return dist;
}
std::vector<DistanceElement> threadDistance(std::vector<DistanceElement> element_list, const arma::mat& rectangular_matrix)
{
for(std::vector<DistanceElement>::iterator it = element_list.begin() ; it != element_list.end(); ++it)
{
it->distance = distanceFunction(rectangular_matrix.row(it->row), rectangular_matrix.row(it->col));
}
return element_list;
}
double distanceFunction(const arma::rowvec& vec_1, const arma::rowvec& vec_2)
{
// Slow
// return accu(abs(vec_1 - vec_2));
return dot(vec_1 - vec_2, vec_1 - vec_2);
}
|