File: Crippen.py

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (224 lines) | stat: -rwxr-xr-x 6,112 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# $Id$
#
# Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC
#
#   @@ All Rights Reserved @@
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.
#
""" Atom-based calculation of LogP and MR using Crippen's approach


    Reference:
      S. A. Wildman and G. M. Crippen *JCICS* _39_ 868-873 (1999)


"""

import os
from rdkit import RDConfig
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
import numpy

_smartsPatterns = {}
_patternOrder = []
# this is the file containing the atom contributions
defaultPatternFileName = os.path.join(RDConfig.RDDataDir, 'Crippen.txt')


def _ReadPatts(fileName):
  """ *Internal Use Only*

    parses the pattern list from the data file

  """
  patts = {}
  order = []
  with open(fileName, 'r') as f:
    lines = f.readlines()
  for line in lines:
    if line[0] != '#':
      splitLine = line.split('\t')
      if len(splitLine) >= 4 and splitLine[0] != '':
        sma = splitLine[1]
        if sma != 'SMARTS':
          sma.replace('"', '')
          p = Chem.MolFromSmarts(sma)
          if p:
            if len(splitLine[0]) > 1 and splitLine[0][1] not in 'S0123456789':
              cha = splitLine[0][:2]
            else:
              cha = splitLine[0][0]
            logP = float(splitLine[2])
            if splitLine[3] != '':
              mr = float(splitLine[3])
            else:
              mr = 0.0
            if cha not in order:
              order.append(cha)
            l = patts.get(cha, [])
            l.append((sma, p, logP, mr))
            patts[cha] = l
        else:
          print('Problems parsing smarts: %s' % (sma))
  return order, patts


_GetAtomContribs = rdMolDescriptors._CalcCrippenContribs


def _pyGetAtomContribs(mol, patts=None, order=None, verbose=0, force=0):
  """ *Internal Use Only*

    calculates atomic contributions to the LogP and MR values

    if the argument *force* is not set, we'll use the molecules stored
    _crippenContribs value when possible instead of re-calculating.

  **Note:** Changes here affect the version numbers of MolLogP and MolMR
    as well as the VSA descriptors in Chem.MolSurf

  """
  if not force and hasattr(mol, '_crippenContribs'):
    return mol._crippenContribs

  if patts is None:
    patts = _smartsPatterns
    order = _patternOrder

  nAtoms = mol.GetNumAtoms()
  atomContribs = [(0., 0.)] * nAtoms
  doneAtoms = [0] * nAtoms
  nAtomsFound = 0
  done = False
  for cha in order:
    pattVect = patts[cha]
    for sma, patt, logp, mr in pattVect:
      #print('try:',entry[0])
      for match in mol.GetSubstructMatches(patt, False, False):
        firstIdx = match[0]
        if not doneAtoms[firstIdx]:
          doneAtoms[firstIdx] = 1
          atomContribs[firstIdx] = (logp, mr)
          if verbose:
            print('\tAtom %d: %s %4.4f %4.4f' % (match[0], sma, logp, mr))
          nAtomsFound += 1
          if nAtomsFound >= nAtoms:
            done = True
            break
    if done:
      break
  mol._crippenContribs = atomContribs
  return atomContribs


def _Init():
  global _smartsPatterns, _patternOrder
  if _smartsPatterns == {}:
    _patternOrder, _smartsPatterns = _ReadPatts(defaultPatternFileName)


def _pyMolLogP(inMol, patts=None, order=None, verbose=0, addHs=1):
  """ DEPRECATED
  """
  if addHs < 0:
    mol = Chem.AddHs(inMol, 1)
  elif addHs > 0:
    mol = Chem.AddHs(inMol, 0)
  else:
    mol = inMol

  if patts is None:
    global _smartsPatterns, _patternOrder
    if _smartsPatterns == {}:
      _patternOrder, _smartsPatterns = _ReadPatts(defaultPatternFileName)
    patts = _smartsPatterns
    order = _patternOrder
  atomContribs = _pyGetAtomContribs(mol, patts, order, verbose=verbose)
  return numpy.sum(atomContribs, 0)[0]


_pyMolLogP.version = "1.1.0"


def _pyMolMR(inMol, patts=None, order=None, verbose=0, addHs=1):
  """ DEPRECATED
  """
  if addHs < 0:
    mol = Chem.AddHs(inMol, 1)
  elif addHs > 0:
    mol = Chem.AddHs(inMol, 0)
  else:
    mol = inMol

  if patts is None:
    global _smartsPatterns, _patternOrder
    if _smartsPatterns == {}:
      _patternOrder, _smartsPatterns = _ReadPatts(defaultPatternFileName)
    patts = _smartsPatterns
    order = _patternOrder

  atomContribs = _pyGetAtomContribs(mol, patts, order, verbose=verbose)
  return numpy.sum(atomContribs, 0)[1]


_pyMolMR.version = "1.1.0"

MolLogP = lambda *x, **y: rdMolDescriptors.CalcCrippenDescriptors(*x, **y)[0]
MolLogP.version = rdMolDescriptors._CalcCrippenDescriptors_version
MolLogP.__doc__ = """ Wildman-Crippen LogP value

  Uses an atom-based scheme based on the values in the paper:
     S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999)

  **Arguments**

    - inMol: a molecule

    - addHs: (optional) toggles adding of Hs to the molecule for the calculation.
      If true, hydrogens will be added to the molecule and used in the calculation.

"""

MolMR = lambda *x, **y: rdMolDescriptors.CalcCrippenDescriptors(*x, **y)[1]
MolMR.version = rdMolDescriptors._CalcCrippenDescriptors_version
MolMR.__doc__ = """ Wildman-Crippen MR value

  Uses an atom-based scheme based on the values in the paper:
     S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999)

  **Arguments**

    - inMol: a molecule

    - addHs: (optional) toggles adding of Hs to the molecule for the calculation.
      If true, hydrogens will be added to the molecule and used in the calculation.

"""

if __name__ == '__main__':
  import sys

  if len(sys.argv):
    ms = []
    verbose = 0
    if '-v' in sys.argv:
      verbose = 1
      sys.argv.remove('-v')
    for smi in sys.argv[1:]:
      ms.append((smi, Chem.MolFromSmiles(smi)))

    for smi, m in ms:
      print('Mol: %s' % (smi))
      logp = MolLogP(m, verbose=verbose)
      print('----')
      mr = MolMR(m, verbose=verbose)
      print('Res:', logp, mr)
      newM = Chem.AddHs(m)
      logp = MolLogP(newM, addHs=0)
      mr = MolMR(newM, addHs=0)
      print('\t', logp, mr)
      print('-*-*-*-*-*-*-*-*')