File: FragmentCatalog.py

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (97 lines) | stat: -rw-r--r-- 2,420 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# $Id$
#
#  Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
#
#   @@ All Rights Reserved @@
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.
#
import sys

from rdkit import Chem
from rdkit.Chem.rdfragcatalog import *


def message(msg, dest=sys.stdout):
  dest.write(msg)


class BitGainsInfo(object):
  id = -1
  description = ''
  gain = 0.0
  nPerClass = None


def ProcessGainsFile(fileName, nToDo=-1, delim=',', haveDescriptions=1):
  inFile = open(fileName, 'r')
  nRead = 0
  res = []
  for line in inFile.xreadlines():
    nRead += 1
    splitL = [x.strip() for x in line.split(delim)]
    if nRead != 1 and len(splitL):
      bit = BitGainsInfo()
      bit.id = int(splitL[0])
      col = 1
      if haveDescriptions:
        bit.description = splitL[col]
        col += 1
      bit.gain = float(splitL[col])
      col += 1
      nPerClass = []
      for entry in splitL[col:]:
        nPerClass.append(int(entry))
      bit.nPerClass = nPerClass
      res.append(bit)
      if len(res) == nToDo:
        break
  return res


def BuildAdjacencyList(catalog, bits, limitInclusion=1, orderLevels=0):
  adjs = {}
  levels = {}
  bitIds = [bit.id for bit in bits]
  for bitId in bitIds:
    entry = catalog.GetBitEntryId(bitId)
    tmp = []
    order = catalog.GetEntryOrder(entry)
    s = levels.get(order, set())
    s.add(bitId)
    levels[order] = s
    for down in catalog.GetEntryDownIds(entry):
      id = catalog.GetEntryBitId(down)
      if not limitInclusion or id in bitIds:
        tmp.append(id)
        order = catalog.GetEntryOrder(down)
        s = levels.get(order, set())
        s.add(id)
        levels[order] = s
    adjs[bitId] = tmp
  if orderLevels:
    # we'll play a little game and sort the indices in each level by
    #  the number of downlinks they have:
    for order in levels.keys():
      ids = levels[order]
      counts = [len(adjs[id]) for id in ids]
      countOrder = argsort(counts)
      l = [ids[x] for x in countOrder]
      l.reverse()
      levels[order] = l
  return adjs, levels


def GetMolsMatchingBit(mols, bit, fps):
  res = []
  if isinstance(bit, BitGainsInfo):
    bitId = bit.id
  else:
    bitId = bit
  for i, mol in enumerate(mols):
    fp = fps[i]
    if fp[bitId]:
      res.append(mol)
  return res