1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//
//
#include <BALL/QSAR/validation.h>
#include <BALL/QSAR/statistics.h>
#include <BALL/QSAR/Model.h>
#include <boost/random/mersenne_twister.hpp>
namespace BALL
{
namespace QSAR
{
Validation::Validation(Model* m)
{
model_ = m;
validation_statistic_ = 0;
yRand_results_.resize(0, 0);
}
Validation::~Validation()
{
}
int Validation::getStat() const
{
return validation_statistic_;
}
void Validation::setTrainingLine(int train_line, int current_line)
{
bool fs = 0; // has feature selection being done?
if (!model_->descriptor_IDs_.empty())
{
fs = 1;
}
std::multiset<unsigned int>::iterator it = model_->descriptor_IDs_.begin();
int t = 0; // index in line of training data
// set each cell of the current line
for (unsigned int i = 0; i < model_->data->descriptor_matrix_.size() && (!fs || it != model_->descriptor_IDs_.end()); i++)
{
// set only those cells that belong to selected descriptors
if ( (fs && *it == i) || !fs )
{
// if (train_line == 0)
// {
// model_->transformations(1, t+1) = model_->data->transformations[i][0];
// model_->transformations(2, t+1) = model_->data->transformations[i][1];
// }
model_->descriptor_matrix_(train_line, t) = model_->data->descriptor_matrix_[i][current_line];
t++;
if (fs)
{
it++;
}
}
}
// set all y-values for current substance
// int a = model_->data->transformations.size() - model_->data->Y_.size();
for (unsigned int i = 0; i < model_->data->Y_.size(); i++)
{
model_->Y_(train_line, i) = model_->data->Y_[i][current_line];
// if (train_line == 0)
// {
// transformations(1, col+i+1) = model_->data->transformations[a+i][0];
// transformations(2, col+i+1) = model_->data->transformations[a+i][1];
// }
}
}
void Validation::setTestLine(int test_line, int current_line, bool back_transform)
{
vector<double> v;
test_substances_[test_line] = v;
// COPY ENTIRE LINE!!, relevant descriptors will be automatically chosen by Model.getSubstanceVector(...) (called by Model.predict(...))
for (unsigned int i = 0; i < model_->data->descriptor_matrix_.size(); i++)
{
test_substances_[test_line].push_back(model_->data->descriptor_matrix_[i][current_line]);
if (back_transform)
{
double stddev = model_->data->descriptor_transformations_[i][1];
test_substances_[test_line][i] = test_substances_[test_line][i]*stddev+model_->data->descriptor_transformations_[i][0];
}
}
if (model_->data->y_transformations_.size() == 0)
{
back_transform = 0;
}
// set all y-values for current substance
for (unsigned int i = 0; i < model_->data->Y_.size(); i++)
{
test_Y_(test_line, i) = model_->data->Y_[i][current_line];
if (back_transform)
{
double stddev = model_->data->y_transformations_[i][1];
test_Y_(test_line, i) = test_Y_(test_line, i)*stddev+model_->data->y_transformations_[i][0];
}
}
}
void Validation::yRand()
{
boost::mt19937 rng(PreciseTime::now().getMicroSeconds());
QSARData* data = const_cast <QSARData*> (model_->data);
for (unsigned int i = 0; i < data->Y_.size(); i++)
{
for (unsigned int j = 0; j < data->Y_[0].size(); j++)
{
int pos = rng() % (data->Y_[0].size()-1); // exchange elements at pos and j
double tmp = data->Y_[i][pos];
data->Y_[i][pos] = data->Y_[i][j];
data->Y_[i][j] = tmp;
}
}
}
const Eigen::MatrixXd& Validation::getYRandResults() const
{
return yRand_results_;
}
}
}
|