File: Fingerprinter.py

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (71 lines) | stat: -rwxr-xr-x 2,137 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# # Automatically adapted for numpy.oldnumeric Jun 27, 2008 by -c

# $Id$
#
#  Copyright (C) 2002-2006  greg Landrum and Rational Discovery LLC
#
#   @@ All Rights Reserved @@
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.
#
"""  EState fingerprinting

"""

import numpy
from rdkit.Chem.EState import EStateIndices
from rdkit.Chem.EState import AtomTypes


def FingerprintMol(mol):
  """ generates the EState fingerprints for the molecule

  Concept from the paper: Hall and Kier JCICS _35_ 1039-1045 (1995)

  two numeric arrays are returned:
    The first (of ints) contains the number of times each possible atom type is hit
    The second (of floats) contains the sum of the EState indices for atoms of
      each type.

  """
  if AtomTypes.esPatterns is None:
    AtomTypes.BuildPatts()
  esIndices = EStateIndices(mol)

  nPatts = len(AtomTypes.esPatterns)
  counts = numpy.zeros(nPatts, numpy.int)
  sums = numpy.zeros(nPatts, numpy.float)

  for i, (_, pattern) in enumerate(AtomTypes.esPatterns):
    matches = mol.GetSubstructMatches(pattern, uniquify=1)
    counts[i] = len(matches)
    for match in matches:
      sums[i] += esIndices[match[0]]
  return counts, sums


def _exampleCode():
  """ Example code for calculating E-state fingerprints """
  from rdkit import Chem
  smis = ['CC', 'CCC', 'c1[nH]cnc1CC(N)C(O)=O', 'NCCc1ccc(O)c(O)c1']
  for smi in smis:
    m = Chem.MolFromSmiles(smi)
    print(smi, Chem.MolToSmiles(m))
    types = AtomTypes.TypeAtoms(m)
    for i in range(m.GetNumAtoms()):
      print('%d %4s: %s' % (i + 1, m.GetAtomWithIdx(i).GetSymbol(), str(types[i])))
    es = EStateIndices(m)
    counts, sums = FingerprintMol(m)
    for i in range(len(AtomTypes.esPatterns)):
      if counts[i]:
        name, _ = AtomTypes.esPatterns[i]
        print('%6s, % 2d, % 5.4f' % (name, counts[i], sums[i]))
    for i in range(len(es)):
      print('% 2d, % 5.4f' % (i + 1, es[i]))
    print('--------')


if __name__ == '__main__':  # pragma: nocover
  _exampleCode()