File: logP.C

package info (click to toggle)
ball 1.5.0%2Bgit20180813.37fc53c-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 239,888 kB
  • sloc: cpp: 326,149; ansic: 4,208; python: 2,303; yacc: 1,778; lex: 1,099; xml: 958; sh: 322; makefile: 95
file content (151 lines) | stat: -rw-r--r-- 5,849 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#include <BALL/STRUCTURE/logP.h>

using namespace std;

namespace BALL
{
	LogP::LogP()
	{
		molecular_similarity_ = new MolecularSimilarity("fragments/functionalGroups.smarts");

		addRule("[$([CH4]),$([CH3]C),$([CH2](C)C)]",0.14441);  // C1
		addRule("[$([CH](C)(C)C),$([C](C)(C)(C)C)]",0.00000);  // C2
		addRule("[$([CH3][#7,#8,#15,#16,#9,Cl,Br,I]),$([CH2X4][#7,#8,#15,#16,#9,Cl,Br,I])]",-0.2035);      // C3
		addRule("[$([CH1X4][#7,#8,#15,#16,#9,Cl,Br,I]),$([CH0X4][#7,#8,#15,#16,#9,Cl,Br,I])]",-0.2051);    // C4
		addRule("[C]=[A;!#1;!#6]",-0.2783);  // C5, ali. heteroatom
		addRule("[$([CH2]=C),$([CH1](=C)A),$([CH0](=C)(A)A),$([C](=C)=C)]",0.1551);   // C6
		addRule("[CX2]#A",0.00170);  // C7
		addRule("[CH3]c",0.08452);  // C8
		addRule("[CH3][a;!#1;!#6]",-0.1444);  // C9, aro. heteroatom
		addRule("[CH2X4]a",-0.0516);  // C10
		addRule("[CHX4]a",0.1193);  // C11
		addRule("[CH0X4]a",-0.0967);  // C12
		addRule("[cH0]-[!#6;!#7;!#8;!#16;!#9;!Cl;!Br;!I]",-0.5443);  // C13
			//"[$([c][#5],$([c][#14]),$([c][#15]),$([c][#33]),$([c][#34]),$([c][#50]),$([c][#80])]"  // C13 alternative
		addRule("[c][#9]",0.0000);  // C14
		addRule("[c][#17]",0.2450);  // C15
		addRule("[c][#35]",0.1980);  // C16
		addRule("[c][#53]",0.0000);  // C17
		addRule("[cH]",0.1581);  // C18
		addRule("[c](:a)(:a):a",0.2955);  // C19
		addRule("[c](:a)(:a)-a",0.2713);  // C20
		addRule("[c](:a)(:a)-C",0.1360);  // C21
		addRule("[c](:a)(:a)-N",0.4619);  // C22
		addRule("[c](:a)(:a)-O",0.5437);  // C23
		addRule("[c](:a)(:a)-S",0.1893);  // C24
		addRule("[$([c](:a)(:a)=C),$([c](:a)(:a)=N),$([c](:a)(:a)=O)]",-0.8186);  // C25
		addRule("[$([C](=C)(a)A),$([C](=C)(c)a),$([CH](=C)a),$([C]=c)]",0.2640);  // C26
		addRule("[CX4][!#5;!#7;!#8;!#15;!#16;!#9;!Cl;!Br;!I]",0.2148);  // C27
		addRule("remaining [C,c]",0.08129);

		addRule("[$([H,h][#6]),$([H,h][H,h])]",0.1230);  // H1, hydrocarbon
		addRule("[$([H,h]O[CX4]),$([H,h]Oc),$([H,h]O[!#6;!#7;!#8;!16]),$([H,h][!#6;!#7;!#8])]",-0.2677);  // H2, alcohol
		addRule("[$([H,h][#7]),$([H,h]O[#7])]",0.2142);  // H3, amine
		addRule("[$([H,h]OC=[#6]),$([H,h]OC=[#7]),$([H,h]OC=O),$([H,h]OC=S),$([H,h]OO),$([H,h]OS)]",0.2980);  // H4, acid
		addRule("remaining [H,h]",0.1125);

		addRule("[NH2+0]A",-1.0190);  // N1
		addRule("[NH+0](A)A",-0.7096);  // N2
		addRule("[NH2+0]a",-1.0270);  // N3
		addRule("[$([NH+0](A)a),$([NH+0](a)a)]",-0.5188);  // N4
		addRule("[$([NH+0]=A),$([NH+0]=a)]",0.08387);  // N5
		addRule("[$([N+0](=A)A),$([N+0](=A)a),$([N+0](=a)A),$([N+0](=a)a)]",0.1836);  // N6
		addRule("[N+0](A)(A)A",-0.3187);  // N7
		addRule("[$([N+0](a)(A)A),$([N+0](a)(a)A),$([N+0](a)(a)a)]",-0.4458);  // N8
		addRule("[N+0]#A",0.01508);  // N9
		addRule("[$([NH3+*]),$([NH2+*]),$([NH+*])]",-1.950);  // N10
		addRule("[n+0]",-0.3239);  // N11
		addRule("[n+*]",-1.119);  // N12
		addRule("[$([NH0+*](A)(A)(A)A),$([NH0+*](=A)(A)A),$([NH0+*](=A)(A)a),$([NH0+*](=[#6])=[#7])]",-0.3396);  // N13
		addRule("[$([N+*]#A),$([N-*]),$([N+*](=[N-*])=N)]",0.2887);  // N14
		addRule("remaining [N,n]",-0.4806);

		addRule("[o]",0.1552);  // O1
		addRule("[$([OH]),$([OH2])]",-0.2893);  // O2, alcohol
		addRule("[$([O](C)C),$([O](C)[A;N,O,P,S,F,Cl,Br,I]),$([O]([A;N,O,P,S,F,Cl,Br,I])[A;N,O,P,S,F,Cl,Br,I])]",-0.0684);  // O3, ali. ether
		addRule("[$([O](A)a),$([O](a)a)]",-0.4195);  // O4, aro. ether
		addRule("[$([O]=[#8]),$([O]=[#7]),$([OX1-*][#7])]",0.0335);  // O5, oxide
		addRule("[OX1-*][#16]",-0.3339);  // O6, oxide
		addRule("[OX1-*][!N;!S]",-1.189);  // O7, oxide
		addRule("[O]=c",0.1788);  // O8, aro. carbonyl
		addRule("[$([O]=[CH]C),$([O]=C(C)C),$([O]=C(C)[A;!#1;!#6]),$([O]=[CH]N),$([O]=[CH]O),$([O]=[CH2]),$([O]=[CX2]=O)]",-0.1525);  // O9, carbonyl ali.
		addRule("[$([O]=[CH]c),$([O]=C(C)c),$([O]=C(c)c),$([O]=C(c)[a;!#1;!#6]),$([O]=C(c)[A;!#1;!#6]),$([O]=C(C)[a;!#1;!#6])]",0.1129);  // O10, carbonyl aro.
		addRule("[$([O]=C([A;!#1;!#6])[A;!#1;!#6]),$([O]=C([A;!#1;!#6])[a;!#1;!#6]),$([O]=C([a;!#1;!#6])[a;!#1;!#6])]",0.4833);  // O11, carbonyl heteroatom
		addRule("[O-1]C(=O)",-1.326);  // O12, acid
		addRule("remaining [O,o]",-0.1188);

		addRule("[#9-0]",0.4202);  // flourine
		addRule("remaining [#9]",-2.996);

		addRule("[#17-0]",0.6895);  // chlorine
		addRule("remaining [#17]",-2.996);

		addRule("[#35-0]",0.8456);  // bromine
		addRule("remaining [#35]",-2.996);

		addRule("[#53-0]",0.8857);  // iodine
		addRule("remaining [#53]",-2.996);

		addRule("[#15]",0.8612);  // phosphorous

		addRule("[S-0]",0.6482); // aliphatic sulfur
		addRule("[$([S-*]),$([S-*])]",-0.0024); // ionic sulfur
		addRule("[s]",0.6237);  // aromatic sulfur

		addRule("[B,Si,Ga,Ge,As,Se,Sn,Te,Pb,Ne,Ar,Kr,Xe,Rn]",-0.3808); // remaining p-block elements

		addRule("[Fe,Cu,Zn,Tc,Cd,Pt,Au,Hg]",-0.0025);  // remaining d-block elements
	}


	LogP::~LogP()
	{
		 delete molecular_similarity_;
	}


	void LogP::addRule(String smarts, double value)
	{
		rules_.push_back(make_pair(smarts,value));
	}


	double LogP::calculate(const String& usmile)
	{
		double logP = 0;
		Size match_sum = 0;

		for(Size i=0; i<rules_.size(); i++)
		{
			if(!rules_[i].first.hasPrefix("remaining"))
			{
				Size matches;
				molecular_similarity_->matchSmarts(usmile,rules_[i].first,matches);
				logP += matches*rules_[i].second;
				match_sum += matches;
//				if(matches>0) cout<<rules_[i].first<<" : "<<matches<<endl;
			}
			else
			{
				Size no_atoms;
				String smarts = rules_[i].first.after("remaining");
				smarts.trim();
				molecular_similarity_->matchSmarts(usmile,smarts,no_atoms);
				if(no_atoms>match_sum)
				{
					logP += (no_atoms-match_sum)*rules_[i].second;
				}

// 				if(match_sum>no_atoms)
// 				{
// 					cout<<"Error: Atoms "<<smarts<<" were matches mulitple times!!"<<endl;
// 					cout<<match_sum<<"  "<<no_atoms<<endl;
// 				}

				match_sum=0;
			}
		}

		return logP;
	}
}