1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
|
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//
#include <BALL/FORMAT/commandlineParser.h>
#include <BALL/QSAR/registry.h>
#include <BALL/QSAR/featureSelection.h>
#include <BALL/QSAR/configIO.h>
#include <fstream>
#include "version.h"
using namespace BALL::QSAR;
using namespace BALL;
using namespace std;
void startValidation(ValidationConfiguration& conf, QSARData* q, String* data_filename);
void startValidation(ifstream& in, QSARData* q, String* data_filename)
{
ValidationConfiguration conf = ConfigIO::readValidationConfiguration(&in);
if(conf.done || conf.for_visualization_only==1) return ; // stop processing this section and continue with next section
startValidation(conf,q,data_filename);
}
void startValidation(ValidationConfiguration& conf, QSARData* q, String* data_filename)
{
bool created_data_object=0;
if(q==NULL || data_filename==NULL)
{
q = new QSARData;
created_data_object=1;
}
Registry reg;
Model* m;
String model_type;
ifstream model_input(conf.model.c_str()); // read model-abbreviation
if(!model_input)
{
Log.error()<<"Error: Model-file '"<<conf.model<<"' does not exist!!"<<endl;
return;
}
getline(model_input,model_type);
getline(model_input,model_type);
model_type = model_type.getField(0,"\t");
model_input.close();
RegistryEntry* entry = reg.getEntry(model_type);
if(!entry->kernel)
{
m = (*entry->create)(*q);
}
else
{
// parameters irrelevant; will be overwritten by those read from file
m = (*entry->createKernel1)(*q,1,1, -1);
}
m->readFromFile(conf.model.c_str());
m->model_val->selectStat(conf.statistic);
if(conf.data!="")
{
if(!data_filename || conf.data!=*data_filename)
{
q->readFromFile(conf.data);
if(data_filename) *data_filename = conf.data;
}
if(conf.val_type==1) m->model_val->testInputData(1);
else if(conf.val_type==2) m->model_val->crossValidation(conf.k_folds,1);
else if(conf.val_type==3) m->model_val->bootstrap(conf.bootstrap_samples);
else if(conf.val_type==4) m->model_val->yRandomizationTest(conf.no_of_permutation_tests,conf.k_folds);
else if(conf.val_type==6)
{
if(entry->regression)
{
((RegressionModel*)m)->validation->calculateCoefficientStdErrors(conf.bootstrap_samples);
}
}
else if(conf.val_type==7)
{
if(!data_filename || conf.validation_data!=*data_filename)
{
q->readFromFile(conf.validation_data.c_str());
*data_filename = conf.validation_data;
}
m->model_val->testInputData(1);
}
}
// save the result of the validation to the specified file
m->model_val->saveToFile(conf.output);
if(created_data_object) delete q;
delete m;
}
#ifndef EXT_MAIN
int main(int argc, char* argv[])
{
CommandlineParser par("Validator","evaluate quality of a QSAR model ", VERSION, String(__DATE__), "QuEasy (QSAR)");
par.registerMandatoryInputFile("i","input mod-file");
par.registerMandatoryInputFile("dat","data-file");
par.registerMandatoryOutputFile("o","output text file");
par.registerMandatoryStringParameter("type","feature-selection type");
String man = "Validator evaluates the quality of a QSAR model.\n\nThe validation technique to be used for this can selected by '-type'. As input this tools need a model-file as generate by InputReader or FeatureSelector and a data-file generated by InputReader containing the prediction data set. Note that the latter must contain response values so that predictions done by the supplied model can be compared to those values by the validation method.";
par.setToolManual(man);
list<String> slist;
slist.push_back("cross validation");
slist.push_back("bootstrapping");
slist.push_back("response permutation");
slist.push_back("evaluate fit to test data");
par.setParameterRestrictions("type",slist);
par.setSupportedFormats("i","mod");
par.setSupportedFormats("dat","dat");
par.setSupportedFormats("o","txt");
par.parse(argc,argv);
Registry reg;
ValidationConfiguration conf;
conf.model = par.get("i");
conf.data = par.get("dat");
conf.output = par.get("o");
conf.k_folds = reg.default_k;
conf.bootstrap_samples = reg.default_no_boostrap_samples;
conf.no_of_permutation_tests = reg.default_no_permutations;
String type = par.get("type");
if (type=="cross_validation" || type=="cross validation")
{
conf.val_type = 2;
}
else if (type=="bootstrapping")
{
conf.val_type = 3;
}
else if (type=="response_permutation" || type=="response permutation")
{
conf.val_type = 4;
}
else if (type=="simple_prediction" || type=="evaluate fit to test data")
{
conf.val_type = 1;
}
else
{
cerr << "Validation-type '"<<type<<"' unknown, possible choices are:"<<endl;
cerr<<" "<<"cross_validation"<<endl;
cerr<<" "<<"bootstrapping"<<endl;
cerr<<" "<<"response_permutation"<<endl;
cerr<<" "<<"simple_prediction = quality of predictions done for given predictions data set only (no sampling)"<<endl;
exit(1);
}
startValidation(conf,0,0);
}
#endif
|