File: predictor.C

package info (click to toggle)
ball 1.4.3~beta1-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 318,984 kB
  • sloc: cpp: 346,579; ansic: 4,097; python: 2,664; yacc: 1,778; lex: 1,099; xml: 964; sh: 688; sql: 316; awk: 118; makefile: 108
file content (130 lines) | stat: -rw-r--r-- 3,237 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#include <fstream>
#include <BALL/QSAR/registry.h>
#include <BALL/QSAR/configIO.h>

using namespace BALL::QSAR;
using namespace BALL;


void startPrediction(std::ifstream& in, QSARData* q, String* data_filename)
{
	PredictionConfiguration conf = ConfigIO::readPredictionConfiguration(&in);
		
	if(conf.done) return; // stop processing this section
	
	bool created_data_object=0;
	if(q==NULL || data_filename==NULL || conf.data!=*data_filename)
	{
		if(q==NULL)
		{
			q = new QSARData;
			created_data_object=1;
		}
		q->readFromFile(conf.data);
		if(data_filename) *data_filename = conf.data;
	}
		
	Registry reg;
	Model* m;
	String model_type;
	std::ifstream model_input(conf.model.c_str()); // read model-abbreviation
	if(!model_input)
	{
		std::cout<<"Error: Model-file '"<<conf.model<<"' does not exist!!"<<std::endl;
		return;
	}
	std::getline(model_input,model_type);
	std::getline(model_input,model_type);
	model_type = model_type.getField(0,"\t");
	model_input.close();
		
	RegistryEntry* entry = reg.getEntry(model_type);
	bool regression = entry->regression;
					
	if(!entry->kernel)
	{
		m = (*entry->create)(*q);
	}
	else
	{	
		// parameters irrelevant; will be overwritten by those read from file
		m = (*entry->createKernel1)(*q,1,1, -1);
	}
				
	m->readFromFile(conf.model.c_str()); // do NOT train again (done by ModelCreator) !!
	
	m->model_val->selectStat(conf.statistic);
	m->model_val->testInputData(1);	// calculate prediction quality
	m->model_val->setCVRes(m->model_val->getFitRes());
	m->model_val->saveToFile(conf.output);
	
	std::ofstream out(conf.output.c_str(),std::ios::app);
	out<<std::endl<<"[Predictions]"<<std::endl;
	int no_act = q->getNoResponseVariables();
	int no_cols = no_act;
	if(conf.print_expected)
	{
		no_cols*=2;
		out<<"# format: predition0, expectation0, ..."<<std::endl;
	}
	out<<"expected_values = "<<conf.print_expected<<std::endl;
	out<<"dimensions = "<<q->getNoSubstances()<<" "<<no_cols<<std::endl;
		
	for(int i=0;i<q->getNoSubstances();i++)
	{
		vector<double>* v = q->getSubstance(i); // get UNcentered descriptor-vector of test compound 
			
		Eigen::VectorXd res = m->predict(*v,1); // transform val. data according to centering of training data
		delete v;
		
		vector<double>* exp = q->getActivity(i); // get UNcentered response value vector
		for(int j=0; j<res.rows();j++)
		{
			out<<res[j]<<"\t";
			if(conf.print_expected)
			{
				out<<(*exp)[j]<<"\t";
			}
		}
		delete exp;	
		out<<std::endl;
	}
	
	if(created_data_object) delete q;
	delete m;
}


#ifndef EXT_MAIN
int main(int argc, char* argv[])
{
	if(argc<2)
	{
		std::cout<<"Please specify configuration file!"<<std::endl;
		return 0;
	}
	
	std::ifstream in(argv[1]);
	if(!in)
	{
		std::cout<<"Configuration file '"<<argv[1]<<"' not found!"<<std::endl;
		return 0;
	}
	
	String line;
	
	for(int i=0;!in.eof();i++) // read ALL Predictor sections
	{
		for(int j=0;!in.eof();j++) // skip everthing until the beginning of the next Validator-section
		{
			std::getline(in,line);
			if(!line.hasPrefix("[Predictor]")) continue;
			else break;
		}
		if(!line.hasPrefix("[Predictor]")) break; // there are no (more) Validator-sections!
		ConfigIO::putbackLine(&in,line);
	
		startPrediction(in,NULL,NULL);
	}	
}
#endif