1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//
#include <BALL/FORMAT/commandlineParser.h>
#include <BALL/QSAR/registry.h>
#include <BALL/QSAR/configIO.h>
#include <fstream>
#include "version.h"
using namespace BALL::QSAR;
using namespace BALL;
using namespace std;
int main(int argc, char* argv[])
{
CommandlineParser par("MolPredictor","predict molecule activities with QSAR model", VERSION, String(__DATE__), "QuEasy (QSAR)");
par.registerMandatoryInputFile("i","input sd-file");
par.registerMandatoryInputFile("mod","file containing QSAR model");
par.registerMandatoryOutputFile("o","output sd-file");
par.registerOptionalInputFile("csv","input csv-file w/ additional descriptors");
par.registerOptionalIntegerParameter("csv_nr","no. of response variables in csv-file");
par.registerFlag("sdp","use sd-properties as additional descriptors");
par.registerFlag("csv_cl","csv-file has compound (row) labels");
par.registerFlag("csv_dl","csv-file has descriptor (column) labels");
par.registerOptionalIntegerParameter("csv_sep","separator symbol in csv-file");
par.registerFlag("rm", "remove input sd-file when finished");
String man = "This tool predictes the response values of compounds in the given molecule file using the specified QSAR model.\n\nInput of this tool is a molecule file (sdf,mol2,drf) and a model-file as generated by ModelCreator or FeatureSelector.\nFeatures for all molecules in the input file are generated automatically. However, if you used an additional, externally generated feature-set to generate your QSAR model, make sure to generate features in the same manner (i.e. using the same external tool with the same settings) for the molecule file to be used here and specify the csv-file with the above options.\n\nOutput of this tool (as specified by '-o') is a molecule file containing the predicted values as a property tag named 'predicted_activity'.";
par.setToolManual(man);
par.setSupportedFormats("i","sdf");
par.setSupportedFormats("mod","mod");
par.setSupportedFormats("csv","csv");
par.setSupportedFormats("o","sdf,txt");
par.parse(argc,argv);
string input = par.get("i");
string model_file = par.get("mod");
string output = par.get("o");
String csv = "";
String s = par.get("csv");
if (s!=CommandlineParser::NOT_FOUND) csv = s;
int csv_no_response = 0;
s = par.get("csv_nr");
if (s!=CommandlineParser::NOT_FOUND) csv_no_response = s.toInt();
bool csv_desc_labels = 0;
s = par.get("csv_dl");
if (s!=CommandlineParser::NOT_FOUND) csv_desc_labels = s.toInt();
bool csv_compound_labels = 0;
s = par.get("csv_cl");
if (s!=CommandlineParser::NOT_FOUND) csv_compound_labels = s.toInt();
String csv_separator = "\t";
s = par.get("csv_sep");
if (s!=CommandlineParser::NOT_FOUND) csv_separator = s;
bool read_sd_descriptors = 0;
s = par.get("sdp");
if(s!=CommandlineParser::NOT_FOUND) read_sd_descriptors = s.toBool();
bool txt_output=0;
if(output.size()>4 && output.substr(output.size()-4)==".txt")
{
txt_output=true;
}
/// read model from given file
QSARData q;
Model* m = createNewModelFromFile(model_file,q);
/// read molecules and descriptors
set<String> act;
Log.level(5) << "Will now read input-file and generate features ... "; Log.flush();
q.readSDFile(input.c_str(),act,read_sd_descriptors,0,0,1,1);
Log.level(5) << "done. " << endl << flush;
if(csv!="") q.readCSVFile(csv.c_str(),0,csv_desc_labels,csv_compound_labels,csv_separator.c_str(),csv_no_response);
/// predict acitivity of each molecule in the SD-file
vector<double> activities(q.getNoSubstances());
Size total = q.getNoSubstances();
for(unsigned int i=0;i<total;i++)
{
if(i%50==0)
{
Log.level(5) << "\tPredicting activities of compounds: " << (i+1) << "/" << total;
Log.flush();
}
vector<double>* v = q.getSubstance(i); // get UNcentered descriptor-vector of test compound
Eigen::VectorXd res = m->predict(*v,1); // transform val. data according to centering of training data
delete v;
activities[i] = res[0];
}
Log.level(5) << "\tPredicting activities of compounds: " << total << "/" << total << endl;
/// write molecules with appended activity-labels to output sd-file
if(!txt_output)
{
SDFile sd_in(input);
SDFile sd_out(output,ios::out);
Molecule* mol;
uint no_pos=0;
for(uint i=0; (mol=sd_in.read()); i++)
{
if(i%50==0)
{
Log.level(5) << "\tSaving compounds: " << (i+1) << "/" << total; Log.flush();
}
mol->setProperty("predicted_activity",activities[i]);
no_pos++;
sd_out << *mol;
delete mol;
}
Log.level(5) << "\tSaving compounds: " << total << "/" << total << endl;
}
else
{
ofstream out(output.c_str());
for(unsigned int i=0; i<activities.size(); i++)
{
out<<"mol_"<<i<<"\t"<<activities[i]<<endl;
}
}
delete m;
if (par.has("rm"))
{
File::remove(par.get("i"));
}
}
|