File: pcrModel.C

package info (click to toggle)
ball 1.5.0%2Bgit20180813.37fc53c-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 239,848 kB
  • sloc: cpp: 326,149; ansic: 4,208; python: 2,303; yacc: 1,778; lex: 1,099; xml: 958; sh: 322; makefile: 93
file content (110 lines) | stat: -rw-r--r-- 2,701 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//
// 

#include <BALL/QSAR/pcrModel.h>
#include <Eigen/Dense>

namespace BALL
{
	namespace QSAR
			{

		PCRModel::PCRModel(const QSARData& q, double frac_var) : LinearModel(q) 
		{
			type_="PCR";
			frac_var_ = frac_var;
		}

		PCRModel::~PCRModel()
		{
		}

		void PCRModel::setFracVar(double frac_var)
		{
			frac_var_ = frac_var;
		}

		void PCRModel::calculateEigenvectors(const Eigen::MatrixXd& data, double frac_var, Eigen::MatrixXd& output)
		{
			Eigen::JacobiSVD<Eigen::MatrixXd> svd;
			svd.compute(data, Eigen::ComputeThinV);

			// find the smallest singular vector that should be taken into account
			// complete variance == sum of all eigen-values == sum of squared singular values
			double complete_var = svd.singularValues().sum();

			double explained_var = 0;
			unsigned int cols = 0;
			if (complete_var < 5*std::numeric_limits < double > ::epsilon())
			{
				throw Exception::NoPCAVariance(__FILE__, __LINE__, "No variance present to be explained by PCA!");
			}

			unsigned int last_vector = 0;
			for (; last_vector < svd.singularValues().rows() && cols < data.rows() && explained_var/complete_var < frac_var ; last_vector++)
			{
				// (singular-value)^2 == eigen-value
				explained_var += svd.singularValues()[last_vector];
				cols++;
			}

			output.resize(data.rows(), cols);

			for (unsigned int i = 0; i < data.rows(); i++)
			{
				for (unsigned int j = 0; j < last_vector ; j++)
				{
					output(i, j) = svd.matrixV()(i, j);
				}
			}
		}


		void PCRModel::train()
		{
			if (descriptor_matrix_.cols() == 0)
			{
				throw Exception::InconsistentUsage(__FILE__, __LINE__, "Data must be read into the model before training!"); 
			}
			Eigen::MatrixXd XX = descriptor_matrix_.transpose()*descriptor_matrix_;
			
			calculateEigenvectors(XX, frac_var_, loadings_);

			latent_variables_ = descriptor_matrix_*loadings_;
			
			RRModel m(*data);
			m.descriptor_matrix_ = latent_variables_;
			m.Y_ = Y_;
			m.train();

			//result of RR is a linear combination of latente variables 
			// = column with length = no of latente variables = > matrix for more than one modelled activity
			weights_ = *m.getTrainingResult();
			training_result_ = loadings_*weights_;
			
			calculateOffsets();
		}


		void PCRModel::setParameters(vector<double>& v)
		{
			if (v.size() != 1)
			{
				String c = "Wrong number of model parameters! Needed: 1;";
				c = c+" given: "+String(v.size());
				throw Exception::ModelParameterError(__FILE__, __LINE__, c.c_str());
			}
			frac_var_ = v[0];
		}


		vector<double> PCRModel::getParameters() const
		{
			vector<double> d;
			d.push_back(frac_var_);
			return d;
		}
	}
}