File: FingerprintUtils.py

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (130 lines) | stat: -rw-r--r-- 3,728 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# $Id$
#
# Copyright (C) 2009 Greg Landrum
#  All Rights Reserved
#

import pickle
from rdkit import DataStructs, Chem
from rdkit import Chem

similarityMethods = {
  'RDK': DataStructs.ExplicitBitVect,
  'AtomPairs': DataStructs.IntSparseIntVect,
  'TopologicalTorsions': DataStructs.LongSparseIntVect,
  'Pharm2D': DataStructs.SparseBitVect,
  'Gobbi2D': DataStructs.SparseBitVect,
  'Morgan': DataStructs.UIntSparseIntVect,
  'Avalon': DataStructs.ExplicitBitVect,
}
supportedSimilarityMethods = list(iter(similarityMethods))


class LayeredOptions:
    loadLayerFlags = 0xFFFFFFFF
    searchLayerFlags = 0x7
    minPath = 1
    maxPath = 6
    fpSize = 1024
    wordSize = 32
    nWords = fpSize // wordSize

    @staticmethod
    def GetFingerprint(mol, query=True):
        if query:
            flags = LayeredOptions.searchLayerFlags
        else:
            flags = LayeredOptions.loadLayerFlags
        return Chem.LayeredFingerprint(mol, layerFlags=flags, minPath=LayeredOptions.minPath,
                                       maxPath=LayeredOptions.maxPath, fpSize=LayeredOptions.fpSize)

    @staticmethod
    def GetWords(mol, query=True):
        txt = LayeredOptions.GetFingerprint(mol, query=query).ToBitString()
        words = [int(txt[x:x + 32], 2) for x in range(0, len(txt), 32)]
        return words

    @staticmethod
    def GetQueryText(mol, query=True):
        words = LayeredOptions.GetWords(mol, query=query)
        colqs = []
        for idx, word in enumerate(words):
            if not word:
                continue
            idx = idx + 1
            colqs.append('%(word)d&Col_%(idx)d=%(word)d' % locals())
        return ' and '.join(colqs)


def BuildSigFactory(options=None, fdefFile=None,
                    bins=[(2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 100)],
                    skipFeats=('LumpedHydrophobe', 'ZnBinder')):
    if options:
        fdefFile = options.fdefFile
    if not fdefFile:
        raise ValueError('bad fdef file')
    from rdkit.Chem import ChemicalFeatures
    from rdkit.Chem.Pharm2D import SigFactory
    featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile)
    sigFactory = SigFactory.SigFactory(featFactory, skipFeats=skipFeats, trianglePruneBins=False)
    sigFactory.SetBins(bins)
    return sigFactory


def BuildAtomPairFP(mol):
    from rdkit.Chem.AtomPairs import Pairs
    fp = Pairs.GetAtomPairFingerprintAsIntVect(mol)
    fp._sumCache = fp.GetTotalVal()
    return fp


def BuildTorsionsFP(mol):
    from rdkit.Chem.AtomPairs import Torsions
    fp = Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol)
    fp._sumCache = fp.GetTotalVal()
    return fp


def BuildRDKitFP(mol):
    fp = Chem.RDKFingerprint(mol, nBitsPerHash=1)
    return fp


def BuildPharm2DFP(mol):
    global sigFactory
    from rdkit.Chem.Pharm2D import Generate
    try:
        fp = Generate.Gen2DFingerprint(mol, sigFactory)
    except IndexError:
        print('FAIL:', Chem.MolToSmiles(mol, True))
        raise
    return fp


def BuildMorganFP(mol):
    from rdkit.Chem import rdMolDescriptors
    fp = rdMolDescriptors.GetMorganFingerprint(mol, 2)
    fp._sumCache = fp.GetTotalVal()
    return fp


def BuildAvalonFP(mol, smiles=None):
    from rdkit.Avalon import pyAvalonTools
    if smiles is None:
        fp = pyAvalonTools.GetAvalonFP(mol)
    else:
        fp = pyAvalonTools.GetAvalonFP(smiles, True)
    return fp


def DepickleFP(pkl, similarityMethod):
    if not isinstance(pkl, (bytes, str)):
        pkl = str(pkl)
    try:
        klass = similarityMethods[similarityMethod]
        fp = klass(pkl)
    except Exception:
        import traceback
        traceback.print_exc()
        fp = pickle.loads(pkl)
    return fp