File: Predictor.C

package info (click to toggle)
ball 1.5.0%2Bgit20180813.37fc53c-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 239,848 kB
  • sloc: cpp: 326,149; ansic: 4,208; python: 2,303; yacc: 1,778; lex: 1,099; xml: 958; sh: 322; makefile: 93
file content (136 lines) | stat: -rwxr-xr-x 4,135 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//

#include <BALL/FORMAT/commandlineParser.h>
#include <BALL/QSAR/registry.h>
#include <BALL/QSAR/configIO.h>
#include <fstream>
#include "version.h"

using namespace BALL::QSAR;
using namespace BALL;
using namespace std;

void startPrediction(PredictionConfiguration& conf, QSARData* q, String* data_filename);


void startPrediction(ifstream& in, QSARData* q, String* data_filename)
{
	PredictionConfiguration conf = ConfigIO::readPredictionConfiguration(&in);
	if(conf.done) return; // stop processing this section

	startPrediction(conf,q,data_filename);
}


void startPrediction(PredictionConfiguration& conf, QSARData* q, String* data_filename)
{
	bool created_data_object=0;
	if(q==NULL || data_filename==NULL || conf.data!=*data_filename)
	{
		if(q==NULL)
		{
			q = new QSARData;
			created_data_object=1;
		}
		q->readFromFile(conf.data);
		if(data_filename) *data_filename = conf.data;
	}

	Registry reg;
	Model* m;
	String model_type;
	ifstream model_input(conf.model.c_str()); // read model-abbreviation
	if(!model_input)
	{
		Log.error()<<"Error: Model-file '"<<conf.model<<"' does not exist!!"<<endl;
		return;
	}
	getline(model_input,model_type);
	getline(model_input,model_type);
	model_type = model_type.getField(0,"\t");
	model_input.close();

	RegistryEntry* entry = reg.getEntry(model_type);

	if(!entry->kernel)
	{
		m = (*entry->create)(*q);
	}
	else
	{
		// parameters irrelevant; will be overwritten by those read from file
		m = (*entry->createKernel1)(*q,1,1, -1);
	}

	m->readFromFile(conf.model.c_str()); // do NOT train again (done by ModelCreator) !!

	m->model_val->selectStat(conf.statistic);
	m->model_val->testInputData(1);	// calculate prediction quality
	m->model_val->setCVRes(m->model_val->getFitRes());
	m->model_val->saveToFile(conf.output);

	ofstream out(conf.output.c_str(),ios::app);
	out<<endl<<"[Predictions]"<<endl;
	int no_act = q->getNoResponseVariables();
	int no_cols = no_act;
	if(conf.print_expected)
	{
		no_cols*=2;
		out<<"# format: predition0, expectation0, ..."<<endl;
	}
	out<<"expected_values = "<<conf.print_expected<<endl;
	out<<"dimensions = "<<q->getNoSubstances()<<" "<<no_cols<<endl;

	for(unsigned int i=0;i<q->getNoSubstances();i++)
	{
		vector<double>* v = q->getSubstance(i); // get UNcentered descriptor-vector of test compound

		Eigen::VectorXd res = m->predict(*v,1); // transform val. data according to centering of training data
		delete v;

		vector<double>* exp = q->getActivity(i); // get UNcentered response value vector
		//for(int j=1; j<=res.getSize();j++) getSize is part of MATH/LinAlg and was replaced by rows() in Eigen
		for(int j=1; j<=res.rows();j++)
		{
			out<<res(j)<<"\t";
			if(conf.print_expected)
			{
				out<<(*exp)[j-1]<<"\t";
			}
		}
		delete exp;
		out<<endl;
	}

	if(created_data_object) delete q;
	delete m;
}


#ifndef EXT_MAIN
int main(int argc, char* argv[])
{
	CommandlineParser par("Predictor","predict activities with QSAR model", VERSION ,String(__DATE__), "QuEasy (QSAR)");
	par.registerMandatoryInputFile("i","input mod-file");
	par.registerMandatoryInputFile("dat","data-file containing prediction data set");
	par.registerMandatoryOutputFile("o","output text file");

	String man = "This tool predictes the response values of compounds in the given data-file using the specified QSAR model.\n\nInput of this tool is a model-file as generated by ModelCreator or FeatureSelector and a data-file generated by InputReader.\n\nOutput of this tool (as specified by '-o') is a text file containing the predicted and, if any, the expected response values in one column each.\nIf you would prefer to use molecule files (sdf,mol2,drf) for input and output, please use the tool MolPredictor instead of this one.";
	par.setToolManual(man);
	par.setSupportedFormats("i","mod");
	par.setSupportedFormats("dat","dat");
	par.setSupportedFormats("o","txt");
	par.parse(argc,argv);

	Registry reg;
	PredictionConfiguration conf;
	conf.model = par.get("i");
	conf.data = par.get("dat");
	conf.output = par.get("o");
	conf.print_expected = true;

	startPrediction(conf,0,0);
}
#endif