File: Matcher.py

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (147 lines) | stat: -rwxr-xr-x 4,042 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#
# Copyright (C) 2003-2008 greg Landrum and Rational Discovery LLC
#
#   @@ All Rights Reserved @@
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.
#
""" functionality for finding pharmacophore matches in molecules


  See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way
  pharmacophores are broken into triangles and labelled.

  See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit
  numbering

"""
from rdkit import Chem
from rdkit.Chem.Pharm2D import Utils


class MatchError(Exception):
  pass


_verbose = 0


def GetAtomsMatchingBit(sigFactory, bitIdx, mol, dMat=None, justOne=0, matchingAtoms=None):
  """ Returns a list of lists of atom indices for a bit

    **Arguments**

      - sigFactory: a SigFactory

      - bitIdx: the bit to be queried

      - mol: the molecule to be examined

      - dMat: (optional) the distance matrix of the molecule

      - justOne: (optional) if this is nonzero, only the first match
        will be returned.

      - matchingAtoms: (optional) if this is nonzero, it should
        contain a sequence of sequences with the indices of atoms in
        the molecule which match each of the patterns used by the
        signature.

    **Returns**

      a list of tuples with the matching atoms
  """
  assert sigFactory.shortestPathsOnly, 'not implemented for non-shortest path signatures'
  nPts, featCombo, scaffold = sigFactory.GetBitInfo(bitIdx)
  if _verbose:
    print('info:', nPts)
    print('\t', featCombo)
    print('\t', scaffold)

  if matchingAtoms is None:
    matchingAtoms = sigFactory.GetMolFeats(mol)

  # find the atoms that match each features
  # fams = sigFactory.GetFeatFamilies()
  choices = []
  for featIdx in featCombo:
    tmp = matchingAtoms[featIdx]
    if tmp:
      choices.append(tmp)
    else:
      # one of the patterns didn't find a match, we
      #  can return now
      if _verbose:
        print('no match found for feature:', featIdx)
      return []

  if _verbose:
    print('choices:')
    print(choices)

  if dMat is None:
    dMat = Chem.GetDistanceMatrix(mol, sigFactory.includeBondOrder)

  distsToCheck = Utils.nPointDistDict[nPts]

  protoPharmacophores = Utils.GetAllCombinations(choices, noDups=1)

  res = []
  for protoPharm in protoPharmacophores:
    if _verbose:
      print('protoPharm:', protoPharm)
    for i in range(len(distsToCheck)):
      dLow, dHigh = sigFactory.GetBins()[scaffold[i]]
      a1, a2 = distsToCheck[i]
      #
      # FIX: this is making all kinds of assumptions about
      #  things being single-atom matches (or at least that
      #  only the first atom matters
      #
      idx1, idx2 = protoPharm[a1][0], protoPharm[a2][0]
      dist = dMat[idx1, idx2]
      if _verbose:
        print('\t dist: %d->%d = %d (%d,%d)' % (idx1, idx2, dist, dLow, dHigh))
      if dist < dLow or dist >= dHigh:
        break
    else:
      if _verbose:
        print('Found one')
      # we found it
      protoPharm.sort()
      protoPharm = tuple(protoPharm)
      if protoPharm not in res:
        res.append(protoPharm)
        if justOne:
          break
  return res


def _exampleCode():
  import os
  from rdkit import RDConfig
  from rdkit.Chem import ChemicalFeatures
  from rdkit.Chem.Pharm2D import SigFactory, Generate

  fdefFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', 'BaseFeatures.fdef')
  featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile)
  factory = SigFactory.SigFactory(featFactory)
  factory.SetBins([(1, 2), (2, 5), (5, 8)])
  factory.Init()

  mol = Chem.MolFromSmiles('OCC(=O)CCCN')
  sig = Generate.Gen2DFingerprint(mol, factory)
  print('onbits:', list(sig.GetOnBits()))

  _verbose = 0
  for bit in sig.GetOnBits():
    atoms = GetAtomsMatchingBit(factory, bit, mol)
    print('\tBit %d: ' % (bit), atoms)

  print('finished')


if __name__ == '__main__':  # pragma: nocover
  _exampleCode()