File: connectivityBase.C

package info (click to toggle)
ball 1.5.0%2Bgit20180813.37fc53c-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 239,888 kB
  • sloc: cpp: 326,149; ansic: 4,208; python: 2,303; yacc: 1,778; lex: 1,099; xml: 958; sh: 322; makefile: 95
file content (212 lines) | stat: -rw-r--r-- 5,930 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//
//

#include <BALL/QSAR/connectivityBase.h>
#include <BALL/QSAR/simpleDescriptors.h>
#include <BALL/KERNEL/PTE.h>
#include <BALL/KERNEL/forEach.h>
#include <BALL/KERNEL/bond.h>

#include <queue>
#include <numeric>

using namespace std;

#define BALL_QSAR_CONNECTIVITYBASE_DEBUG
#undef  BALL_QSAR_CONNECTIVITYBASE_DEBUG

namespace BALL
{
	ConnectivityBase::ConnectivityBase()
		:	Descriptor()
	{
	}

	ConnectivityBase::ConnectivityBase(const ConnectivityBase& cb)
		:	Descriptor(cb)
	{
	}

	ConnectivityBase::ConnectivityBase(const String& name)
		:	Descriptor(name)
	{
	}

	ConnectivityBase::ConnectivityBase(const String& name, const String& unit)
		:	Descriptor(name, unit)
	{
	}

	ConnectivityBase& ConnectivityBase::operator = (const ConnectivityBase& cb)
	{
		this->setName(cb.getName());
		this->setUnit(cb.getUnit());
		return *this;
	}

	ConnectivityBase::~ConnectivityBase()
	{
	}
	
	bool ConnectivityBase::isValid_(AtomContainer& ac)
	{
		static HashMap<Handle, PreciseTime> mod_times;
		PreciseTime last_mod = ac.getModificationTime(); 
		Handle mol_handle = ac.getHandle();
		if (mod_times.has(mol_handle))
		{
			if (mod_times[mol_handle] == last_mod)
			{
				#ifdef BALL_QSAR_CONNECTIVITYBASE_DEBUG
				cerr << ">> ConnectivityBase::isValid: molcule valid!" << endl;
				#endif
				return true;
			}
			else
			{
				mod_times[mol_handle] = last_mod;
				#ifdef BALL_QSAR_CONNECTIVITYBASE_DEBUG
				cerr << ">> ConnectivityBase::isValid: atom container not valid, modified!" << endl; 
				#endif
				return false;
			}
		}
		else
		{
			mod_times.insert(std::make_pair(mol_handle, last_mod));
			#ifdef BALL_QSAR_CONNECTIVITYBASE_DEBUG
			cerr << ">> ConnectivityBase::isValid: atom container not valid, first call!" << endl;
			#endif
			return false;
		}
	}
	
	void ConnectivityBase::computeAllDescriptors(AtomContainer& ac)
	{	
		if (!isValid_(ac))
		{
			calculate_(ac);
		}
	}

	void ConnectivityBase::calculate_(AtomContainer& ac)
	{
		// we need aromaitcity detected and the number of heavy bonds
		NumberOfHeavyBonds nhb;
		ac.apply(nhb);

		//cerr << "ConnectivityBase::calculate_(AtomContainer& ac)" << endl;
		// iterators needed in this function
		AtomConstIterator atom_it;
		Atom::BondConstIterator bond_it;

		// hashmap to map the atoms to a size for direct matrix access
		// indices from [0-num_heavy_atoms]  are heavy atoms rest hydrogen
		HashMap<const Atom*, Size> index_map;
		Size num_heavy_atoms = 0;
		for (atom_it = ac.beginAtom(); atom_it != ac.endAtom(); ++atom_it)
		{
			if (atom_it->getElement() != PTE[Element::H])
			{
				index_map.insert(std::make_pair(&(*atom_it), num_heavy_atoms++));
			}
		}

		//cerr << "Calc dist matrix";
		vector<double> row_sums(num_heavy_atoms, 0.0);
		// do for every atom the min dists to all other atoms 
		// (single source shortest path for every single atom)
		for (atom_it = ac.beginAtom(); atom_it != ac.endAtom(); ++atom_it)
		{
			if (atom_it->getElement() != PTE[Element::H])
			{
				vector<double> dists(num_heavy_atoms, std::numeric_limits<double>::max());
				recursion_(dists, &(*atom_it), index_map);		
				// calc ro sum
				double sum(0);
				sum = accumulate(dists.begin(), dists.end(), sum);
				row_sums[index_map[&(*atom_it)]] = sum;
			}
		}	
		
		// compute Balanbans J indices
		atom_it = ac.beginAtom();
		bond_it = atom_it->beginBond();
		double value(0);
		BALL_FOREACH_BOND (ac, atom_it, bond_it)
		{
			const Atom * a1 = bond_it->getFirstAtom();
			const Atom * a2 = bond_it->getSecondAtom();
			if (a1->getElement() != PTE[Element::H] && a2->getElement() != PTE[Element::H])
			{
				value += pow(row_sums[index_map[a1]] * row_sums[index_map[a2]], -0.5);
			}
		}
		
		double balaban_j_idx(0);

		// here is not the NumberOfBonds descriptor used to keep this descriptor independend
		double num_heavy_bonds = ac.getProperty("NumberOfHeavyBonds").getDouble();
		
		double q = num_heavy_bonds;
		double mu = num_heavy_bonds-num_heavy_atoms+1;
		balaban_j_idx = q/(mu+1) * value;
		ac.setProperty("BalabanIndexJ", balaban_j_idx);
	}



	void ConnectivityBase::recursion_(vector<double>& dists, const Atom* source, HashMap<const Atom*, Size>& index_map)
	{
		//cerr << "ConnectivityBase::recursion_(...)" << endl;
		priority_queue<pair<double, const Atom*> > todo_pq;
		
		// the distances the bond order are associated with
		const double bond_value[] = { 0.0, 1.0, 0.5, 1.0/3.0, 0.0, 2.0/3.0 };

		// set the distance to source to itself to zero
		Size source_idx = index_map[source];
		dists[source_idx] = 0.0;
		
		// set the distances from the atoms next to the source atom
		for (Atom::BondConstIterator it = source->beginBond(); it != source->endBond(); it++)
		{
			if (it->getBoundAtom(*source)->getElement() != PTE[Element::H])
			{
				Size bound_idx = index_map[it->getBoundAtom(*source)];
				dists[bound_idx] = bond_value[it->getOrder()];
				todo_pq.push(std::make_pair(dists[bound_idx], it->getBoundAtom(*source)));
			}
		}
	
		// reach all nodes
		while (!todo_pq.empty())
		{
			// extract-min O(1)
			const Atom * min_atom;
			min_atom = todo_pq.top().second;
			Size min_idx = index_map[min_atom];
			todo_pq.pop();
			
			// relax O(1)
			Atom::BondConstIterator bond_it;
			for (bond_it = min_atom->beginBond(); bond_it != min_atom->endBond(); ++bond_it)
			{
				// if the bound atom is a hydrogen we don't process it
				if (bond_it->getBoundAtom(*min_atom)->getElement() != PTE[Element::H])
				{
					Size bound_idx = index_map[(bond_it->getBoundAtom(*min_atom))];
					// if distnace smaller set distance to new value, and add the atoms to the pq
					if (dists[bound_idx] > dists[min_idx] + bond_value[bond_it->getOrder()])
					{
						dists[bound_idx] = dists[min_idx] + bond_value[bond_it->getOrder()];
						todo_pq.push(std::make_pair(dists[bound_idx], bond_it->getBoundAtom(*min_atom)));
					}
				}
			}
		}
	}

} // namespace BALL