File: AntitargetRescorer.C

package info (click to toggle)
ball 1.5.0%2Bgit20180813.37fc53c-11
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 239,924 kB
  • sloc: cpp: 326,149; ansic: 4,208; python: 2,303; yacc: 1,778; lex: 1,099; xml: 958; sh: 322; javascript: 164; makefile: 88
file content (144 lines) | stat: -rwxr-xr-x 5,219 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//

#include <BALL/FORMAT/molFileFactory.h>
#include <BALL/FORMAT/genericMolFile.h>
#include <BALL/KERNEL/molecule.h>
//#include <BALL/DOCKING/COMMON/dockingAlgorithm.h>
#include <BALL/DOCKING/COMMON/flexibleMolecule.h>
#include <BALL/FORMAT/commandlineParser.h>
#include "version.h"

using namespace BALL;
using namespace std;

double calculateMedian(HashMap<String, double>& scores)
{
	multiset<double> list;
	for (HashMap < String, double > ::iterator it = scores.begin(); it != scores.end(); it++)
	{
		if (it->second < 100)
		{
			list.insert(it->second);
		}
	}
	int size = list.size();
	int i = 0;
	multiset<double>::iterator it = list.begin();
	for (; i < size/2 && it != list.end(); it++, i++)
	{
	}
	return *it;
}

int main(int argc, char* argv[])
{
	CommandlineParser parpars("AntitargetRescorer", "rescore w/ anti-target dock-results", VERSION, String(__DATE__), "Rescoring");
	parpars.registerMandatoryInputFile("t", "input file w/ target dock-results");
	parpars.registerMandatoryInputFile("at", "input file w/ anti-target dock-results");
	parpars.registerMandatoryOutputFile("o", "output file");
	String manual = "This tool rescores docking output poses.\nAntitargetRescoring can be used to try to enhance target specificity. Therefore, dock your compounds into your target of interest and into a (very) different protein and supply the docking results here. All compounds that received a very good antitarget-score will thus be penalized, i.e. they will have a much worse score within the output file.\n\nAs input we need:\n\
    * a file containing the compounds that are to be rescored. Supported formats are mol2, sdf or drf (DockResultFile, xml-based). Those compound should have been docket into the specified protein (i.e. the target).\n\
    * a file containing the same compounds docked into the antitarget.\n\nOutput of this tool is a file in the same format as the input ligand file containing all compounds with scores obtained by rescoring in form of a property 'antitarget_rescore'.";
	parpars.setToolManual(manual);
	parpars.setSupportedFormats("t","mol2,sdf,drf");
	parpars.setSupportedFormats("at","mol2,sdf,drf");
	parpars.setSupportedFormats("o","mol2,sdf,drf");
	parpars.parse(argc, argv);

	GenericMolFile* target = MolFileFactory::open(parpars.get("t"));
	GenericMolFile* antitarget = MolFileFactory::open(parpars.get("at"));

	String label_t = "score";
	String label_at = "score";

	/// Fetch anti-target scores and compute their median
	HashMap<String, double> antitarget_scores;
	int at_missing_scores = 0;
	for (Molecule* mol = antitarget->read(); mol; delete mol, mol = antitarget->read())
	{
		if (mol->hasProperty(label_at))
		{
			double antitarget_score = ((String)mol->getProperty(label_at).toString()).toDouble();
			String topology_hash;
			FlexibleMolecule::generateTopologyHash(mol, topology_hash, 1);
			antitarget_scores.insert(make_pair(topology_hash, antitarget_score));
		}
		else at_missing_scores++;
	}
	double median_at = calculateMedian(antitarget_scores);

	/// Fetch target scores and compute their median
	HashMap<String, double> target_scores;
	for (Molecule* mol = target->read(); mol; delete mol, mol = target->read())
	{
		if (mol->hasProperty(label_at))
		{
			double target_score = ((String)mol->getProperty(label_at).toString()).toDouble();
			String topology_hash;
			FlexibleMolecule::generateTopologyHash(mol, topology_hash, 1);
			target_scores.insert(make_pair(topology_hash, target_score));
		}
	}
	double median_t = calculateMedian(target_scores);

	/// Compute re-scores and write rescored molecules to output file
	double offset = median_t-median_at;
	int t_missing_scores = 0;
	delete target;
	target = MolFileFactory::open(parpars.get("t"));
	GenericMolFile* output = MolFileFactory::open(parpars.get("o"), ios::out, target);
	for (Molecule* mol = target->read(); mol; delete mol, mol = target->read())
	{
		String topology_hash;
		FlexibleMolecule::generateTopologyHash(mol, topology_hash, 1);

		if (mol->hasProperty(label_t))
		{
			double target_score = ((String)mol->getProperty(label_at).toString()).toDouble();

			HashMap<String, double>::iterator it = antitarget_scores.find(topology_hash);
			if (it != antitarget_scores.end())
			{
				double antitarget_score = it->second+offset;

				double p;
				if (target_score < 0) p = target_score*0.95;
				else p = target_score*1.05;
				if (antitarget_score < p)
				{
					double rescore = p+fabs(p-antitarget_score)*100;
					mol->setProperty("antitarget_rescore", rescore);
				}
				else
				{
					mol->setProperty("antitarget_rescore", target_score);
				}
			}
			else
			{
				mol->setProperty("antitarget_rescore", target_score);
			}
		}
		else t_missing_scores++;

		*output << *mol;
	}

	if (at_missing_scores > 0)
	{
		Log.level(20)<<"[Warning:] "<<at_missing_scores<<" molecules in antitarget input file did not contain score property."<<endl;
	}
	if (t_missing_scores > 0)
	{
		Log.level(20)<<"[Warning:] "<<t_missing_scores<<" molecules in target input file did not contain score property."<<endl;
	}

	target->close();
	antitarget->close();
	output->close();
	delete target;
	delete antitarget;
	delete output;
}