File: Validator.C

package info (click to toggle)
ball 1.5.0%2Bgit20180813.37fc53c-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 239,888 kB
  • sloc: cpp: 326,149; ansic: 4,208; python: 2,303; yacc: 1,778; lex: 1,099; xml: 958; sh: 322; makefile: 95
file content (173 lines) | stat: -rw-r--r-- 4,948 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//

#include <BALL/FORMAT/commandlineParser.h>
#include <BALL/QSAR/registry.h>
#include <BALL/QSAR/featureSelection.h>
#include <BALL/QSAR/configIO.h>
#include <fstream>
#include "version.h"

using namespace BALL::QSAR;
using namespace BALL;
using namespace std;

void startValidation(ValidationConfiguration& conf, QSARData* q, String* data_filename);


void startValidation(ifstream& in, QSARData* q, String* data_filename)
{
	ValidationConfiguration conf = ConfigIO::readValidationConfiguration(&in);

	if(conf.done || conf.for_visualization_only==1) return ; // stop processing this section and continue with next section

	startValidation(conf,q,data_filename);
}


void startValidation(ValidationConfiguration& conf, QSARData* q, String* data_filename)
{
	bool created_data_object=0;
	if(q==NULL || data_filename==NULL)
	{
		q = new QSARData;
		created_data_object=1;
	}

	Registry reg;
	Model* m;
	String model_type;

	ifstream model_input(conf.model.c_str()); // read model-abbreviation
	if(!model_input)
	{
		Log.error()<<"Error: Model-file '"<<conf.model<<"' does not exist!!"<<endl;
		return;
	}
	getline(model_input,model_type);
	getline(model_input,model_type);
	model_type = model_type.getField(0,"\t");
	model_input.close();

	RegistryEntry* entry = reg.getEntry(model_type);

	if(!entry->kernel)
	{
		m = (*entry->create)(*q);
	}
	else
	{
		// parameters irrelevant; will be overwritten by those read from file
		m = (*entry->createKernel1)(*q,1,1, -1);
	}

	m->readFromFile(conf.model.c_str());
	m->model_val->selectStat(conf.statistic);

	if(conf.data!="")
	{
		if(!data_filename || conf.data!=*data_filename)
		{
			q->readFromFile(conf.data);
			if(data_filename) *data_filename = conf.data;
		}

		if(conf.val_type==1) m->model_val->testInputData(1);

		else if(conf.val_type==2) m->model_val->crossValidation(conf.k_folds,1);

		else if(conf.val_type==3) m->model_val->bootstrap(conf.bootstrap_samples);

		else if(conf.val_type==4) m->model_val->yRandomizationTest(conf.no_of_permutation_tests,conf.k_folds);

		else if(conf.val_type==6)
		{
			if(entry->regression)
			{
				((RegressionModel*)m)->validation->calculateCoefficientStdErrors(conf.bootstrap_samples);
			}
		}

		else if(conf.val_type==7)
		{
			if(!data_filename || conf.validation_data!=*data_filename)
			{
				q->readFromFile(conf.validation_data.c_str());
				*data_filename = conf.validation_data;
			}
			m->model_val->testInputData(1);
		}
	}

	// save the result of the validation to the specified file
	m->model_val->saveToFile(conf.output);

	if(created_data_object) delete q;
	delete m;
}


#ifndef EXT_MAIN
int main(int argc, char* argv[])
{
	CommandlineParser par("Validator","evaluate quality of a QSAR model ", VERSION, String(__DATE__), "QuEasy (QSAR)");
	par.registerMandatoryInputFile("i","input mod-file");
	par.registerMandatoryInputFile("dat","data-file");
	par.registerMandatoryOutputFile("o","output text file");
	par.registerMandatoryStringParameter("type","feature-selection type");


	String man = "Validator evaluates the quality of a QSAR model.\n\nThe validation technique to be used for this can selected by '-type'. As input this tools need a model-file as generate by InputReader or FeatureSelector and a data-file generated by InputReader containing the prediction data set. Note that the latter must contain response values so that predictions done by the supplied model can be compared to those values by the validation method.";
	par.setToolManual(man);
	list<String> slist;
	slist.push_back("cross validation");
	slist.push_back("bootstrapping");
	slist.push_back("response permutation");
	slist.push_back("evaluate fit to test data");
	par.setParameterRestrictions("type",slist);
	par.setSupportedFormats("i","mod");
	par.setSupportedFormats("dat","dat");
	par.setSupportedFormats("o","txt");
	par.parse(argc,argv);


	Registry reg;
	ValidationConfiguration conf;
	conf.model = par.get("i");
	conf.data = par.get("dat");
	conf.output = par.get("o");
	conf.k_folds = reg.default_k;
	conf.bootstrap_samples = reg.default_no_boostrap_samples;
	conf.no_of_permutation_tests = reg.default_no_permutations;

	String type = par.get("type");
	if (type=="cross_validation" || type=="cross validation")
	{
		conf.val_type = 2;
	}
	else if (type=="bootstrapping")
	{
		conf.val_type = 3;
	}
	else if (type=="response_permutation" || type=="response permutation")
	{
		conf.val_type = 4;
	}
	else if (type=="simple_prediction" || type=="evaluate fit to test data")
	{
		conf.val_type = 1;
	}
	else
	{
		cerr << "Validation-type '"<<type<<"' unknown, possible choices are:"<<endl;
		cerr<<"   "<<"cross_validation"<<endl;
		cerr<<"   "<<"bootstrapping"<<endl;
		cerr<<"   "<<"response_permutation"<<endl;
		cerr<<"   "<<"simple_prediction   = quality of predictions done for given predictions data set only (no sampling)"<<endl;
		exit(1);
	}

	startValidation(conf,0,0);
}
#endif