File: testGenerators.py

package info (click to toggle)
rdkit 202209.3-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 203,880 kB
  • sloc: cpp: 334,239; python: 80,247; ansic: 24,579; java: 7,667; sql: 2,123; yacc: 1,884; javascript: 1,358; lex: 1,260; makefile: 576; xml: 229; fortran: 183; cs: 181; sh: 101
file content (288 lines) | stat: -rw-r--r-- 13,457 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
from rdkit import Chem, DataStructs
from rdkit.Chem import rdFingerprintGenerator
import numpy as np
import unittest


class TestCase(unittest.TestCase):

  def setUp(self):
    pass

  def testAtomPairGenerator(self):
    m = Chem.MolFromSmiles('CCC')
    g = rdFingerprintGenerator.GetAtomPairGenerator()
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 2)

    fp = g.GetCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 2)

    fp = g.GetSparseFingerprint(m)
    nzc = fp.GetNumOnBits()
    self.assertEqual(nzc, 3)

    fp = g.GetFingerprint(m)
    nzc = fp.GetNumOnBits()
    self.assertEqual(nzc, 3)

    g = rdFingerprintGenerator.GetAtomPairGenerator(
      atomInvariantsGenerator=rdFingerprintGenerator.GetAtomPairAtomInvGen())
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 2)

    g = rdFingerprintGenerator.GetAtomPairGenerator(minDistance=2)
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 1)

    g = rdFingerprintGenerator.GetAtomPairGenerator(maxDistance=1)
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 1)

    g = rdFingerprintGenerator.GetAtomPairGenerator(countSimulation=False)
    fp = g.GetSparseFingerprint(m)
    nzc = fp.GetNumOnBits()
    self.assertEqual(nzc, 2)

    invGen = rdFingerprintGenerator.GetAtomPairAtomInvGen(includeChirality=False)
    invGenChirality = rdFingerprintGenerator.GetAtomPairAtomInvGen(includeChirality=True)
    g = rdFingerprintGenerator.GetAtomPairGenerator(includeChirality=False,
                                                    atomInvariantsGenerator=invGen)
    gChirality = rdFingerprintGenerator.GetAtomPairGenerator(
      includeChirality=True, atomInvariantsGenerator=invGenChirality)
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    fpChirality = gChirality.GetSparseCountFingerprint(m)
    nzChirality = fpChirality.GetNonzeroElements()
    self.assertNotEqual(nz.keys(), nzChirality.keys())

  def testMorganGenerator(self):
    m = Chem.MolFromSmiles('CCCC(=O)O')
    g = rdFingerprintGenerator.GetMorganGenerator(3)
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 14)

    invgen = rdFingerprintGenerator.GetMorganAtomInvGen()
    g = rdFingerprintGenerator.GetMorganGenerator(radius=3, atomInvariantsGenerator=invgen)
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 14)

    invgen = rdFingerprintGenerator.GetMorganFeatureAtomInvGen()
    g = rdFingerprintGenerator.GetMorganGenerator(radius=3, atomInvariantsGenerator=invgen)
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 13)

    ms = [Chem.MolFromSmiles(x, sanitize=False) for x in ('C1=CC=CN=N1', 'C1C=CC=NN=1')]
    for m in ms:
      m.UpdatePropertyCache()
      Chem.GetSymmSSSR(m)

    g = rdFingerprintGenerator.GetMorganGenerator(radius=2, useBondTypes=True)
    self.assertNotEqual(g.GetSparseCountFingerprint(ms[0]), g.GetSparseCountFingerprint(ms[1]))
    g = rdFingerprintGenerator.GetMorganGenerator(radius=2, useBondTypes=False)
    self.assertEqual(g.GetSparseCountFingerprint(ms[0]), g.GetSparseCountFingerprint(ms[1]))

    binvgen = rdFingerprintGenerator.GetMorganBondInvGen(useBondTypes=False)
    g2 = rdFingerprintGenerator.GetMorganGenerator(radius=2, bondInvariantsGenerator=binvgen)
    self.assertEqual(g.GetSparseCountFingerprint(ms[0]), g2.GetSparseCountFingerprint(ms[0]))
    self.assertEqual(g.GetSparseCountFingerprint(ms[1]), g2.GetSparseCountFingerprint(ms[1]))

  def testRDKitFPGenerator(self):
    m = Chem.MolFromSmiles('CCCCC')
    g = rdFingerprintGenerator.GetRDKitFPGenerator()
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 8)

  def testTopologicalTorsionGenerator(self):
    m = Chem.MolFromSmiles('CCCCC')
    g = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 1)

  def testBulk(self):
    m1 = Chem.MolFromSmiles('CCC')
    m2 = Chem.MolFromSmiles('OCCCCC')
    m3 = Chem.MolFromSmiles('CCCCC')

    g = rdFingerprintGenerator.GetAtomPairGenerator()
    results = rdFingerprintGenerator.GetSparseCountFPs([m1, m2, m3],
                                                       rdFingerprintGenerator.AtomPairFP)
    self.assertEqual(results[0], g.GetSparseCountFingerprint(m1))
    self.assertEqual(results[1], g.GetSparseCountFingerprint(m2))
    self.assertEqual(results[2], g.GetSparseCountFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetMorganGenerator(2)
    results = rdFingerprintGenerator.GetSparseCountFPs([m1, m2, m3],
                                                       rdFingerprintGenerator.MorganFP)
    self.assertEqual(results[0], g.GetSparseCountFingerprint(m1))
    self.assertEqual(results[1], g.GetSparseCountFingerprint(m2))
    self.assertEqual(results[2], g.GetSparseCountFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetRDKitFPGenerator()
    results = rdFingerprintGenerator.GetSparseCountFPs([m1, m2, m3], rdFingerprintGenerator.RDKitFP)
    self.assertEqual(results[0], g.GetSparseCountFingerprint(m1))
    self.assertEqual(results[1], g.GetSparseCountFingerprint(m2))
    self.assertEqual(results[2], g.GetSparseCountFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
    results = rdFingerprintGenerator.GetSparseCountFPs([m1, m2, m3],
                                                       rdFingerprintGenerator.TopologicalTorsionFP)
    self.assertEqual(results[0], g.GetSparseCountFingerprint(m1))
    self.assertEqual(results[1], g.GetSparseCountFingerprint(m2))
    self.assertEqual(results[2], g.GetSparseCountFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetAtomPairGenerator()
    results = rdFingerprintGenerator.GetSparseFPs([m1, m2, m3], rdFingerprintGenerator.AtomPairFP)
    self.assertEqual(results[0], g.GetSparseFingerprint(m1))
    self.assertEqual(results[1], g.GetSparseFingerprint(m2))
    self.assertEqual(results[2], g.GetSparseFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetMorganGenerator(2)
    results = rdFingerprintGenerator.GetSparseFPs([m1, m2, m3], rdFingerprintGenerator.MorganFP)
    self.assertEqual(results[0], g.GetSparseFingerprint(m1))
    self.assertEqual(results[1], g.GetSparseFingerprint(m2))
    self.assertEqual(results[2], g.GetSparseFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetRDKitFPGenerator()
    results = rdFingerprintGenerator.GetSparseFPs([m1, m2, m3], rdFingerprintGenerator.RDKitFP)
    self.assertEqual(results[0], g.GetSparseFingerprint(m1))
    self.assertEqual(results[1], g.GetSparseFingerprint(m2))
    self.assertEqual(results[2], g.GetSparseFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
    results = rdFingerprintGenerator.GetSparseFPs([m1, m2, m3],
                                                  rdFingerprintGenerator.TopologicalTorsionFP)
    self.assertEqual(results[0], g.GetSparseFingerprint(m1))
    self.assertEqual(results[1], g.GetSparseFingerprint(m2))
    self.assertEqual(results[2], g.GetSparseFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetAtomPairGenerator()
    results = rdFingerprintGenerator.GetCountFPs([m1, m2, m3], rdFingerprintGenerator.AtomPairFP)
    self.assertEqual(results[0], g.GetCountFingerprint(m1))
    self.assertEqual(results[1], g.GetCountFingerprint(m2))
    self.assertEqual(results[2], g.GetCountFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetMorganGenerator(2)
    results = rdFingerprintGenerator.GetCountFPs([m1, m2, m3], rdFingerprintGenerator.MorganFP)
    self.assertEqual(results[0], g.GetCountFingerprint(m1))
    self.assertEqual(results[1], g.GetCountFingerprint(m2))
    self.assertEqual(results[2], g.GetCountFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetRDKitFPGenerator()
    results = rdFingerprintGenerator.GetCountFPs([m1, m2, m3], rdFingerprintGenerator.RDKitFP)
    self.assertEqual(results[0], g.GetCountFingerprint(m1))
    self.assertEqual(results[1], g.GetCountFingerprint(m2))
    self.assertEqual(results[2], g.GetCountFingerprint(m3))
    self.assertEqual(len(results), 3)

    g = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
    results = rdFingerprintGenerator.GetCountFPs([m1, m2, m3],
                                                 rdFingerprintGenerator.TopologicalTorsionFP)
    self.assertEqual(results[0], g.GetCountFingerprint(m1))
    self.assertEqual(results[1], g.GetCountFingerprint(m2))
    self.assertEqual(results[2], g.GetCountFingerprint(m3))
    self.assertEqual(len(results), 3)

  def testNumBitsPerFeature(self):
    m1 = Chem.MolFromSmiles('CCCO')
    g = rdFingerprintGenerator.GetRDKitFPGenerator(minPath=1, maxPath=2)
    fp = g.GetFingerprint(m1)
    self.assertEqual(fp.GetNumOnBits(), 8)

    g = rdFingerprintGenerator.GetRDKitFPGenerator(minPath=1, maxPath=2, numBitsPerFeature=1)
    fp = g.GetFingerprint(m1)
    self.assertEqual(fp.GetNumOnBits(), 4)

  def testAdditionalOutput(self):
    m1 = Chem.MolFromSmiles('CCO')
    g = rdFingerprintGenerator.GetAtomPairGenerator()
    ao = rdFingerprintGenerator.AdditionalOutput()
    ao.AllocateAtomCounts()
    fp = g.GetFingerprint(m1, additionalOutput=ao)
    self.assertEqual(ao.GetAtomCounts(), (2, 2, 2))
    self.assertIsNone(ao.GetAtomToBits())
    self.assertIsNone(ao.GetBitInfoMap())
    self.assertIsNone(ao.GetBitPaths())

    ao = rdFingerprintGenerator.AdditionalOutput()
    ao.AllocateAtomToBits()
    fp = g.GetFingerprint(m1, additionalOutput=ao)
    self.assertIsNone(ao.GetAtomCounts())
    self.assertEqual(ao.GetAtomToBits(), ((351, 479), (351, 399), (479, 399)))
    self.assertIsNone(ao.GetBitInfoMap())
    self.assertIsNone(ao.GetBitPaths())

    ao = rdFingerprintGenerator.AdditionalOutput()
    ao.AllocateBitInfoMap()
    fp = g.GetFingerprint(m1, additionalOutput=ao)
    self.assertIsNone(ao.GetAtomCounts())
    self.assertIsNone(ao.GetAtomToBits())
    self.assertEqual(ao.GetBitInfoMap(), {351: ((0, 1), ), 399: ((1, 2), ), 479: ((0, 2), )})
    self.assertIsNone(ao.GetBitPaths())

  def testCountBounds(self):
    m = Chem.MolFromSmiles('COc1ccc(CCNC(=O)c2ccccc2C(=O)NCCc2ccc(OC)cc2)cc1')
    fp1 = rdFingerprintGenerator.GetRDKitFPGenerator(fpSize=2048,
                                                     countSimulation=True).GetFingerprint(m)
    fp2 = rdFingerprintGenerator.GetRDKitFPGenerator(fpSize=2048, countSimulation=True,
                                                     countBounds=(1, 8, 16, 32)).GetFingerprint(m)
    self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())
    fp1 = rdFingerprintGenerator.GetTopologicalTorsionGenerator(
      fpSize=2048, countSimulation=True).GetFingerprint(m)
    fp2 = rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=2048, countSimulation=True,
                                                                countBounds=(1, 8, 16,
                                                                             32)).GetFingerprint(m)
    self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())
    fp1 = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048,
                                                    countSimulation=True).GetFingerprint(m)
    fp2 = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048, countSimulation=True,
                                                    countBounds=(1, 8, 16, 32)).GetFingerprint(m)
    self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())
    fp1 = rdFingerprintGenerator.GetAtomPairGenerator(fpSize=2048,
                                                      countSimulation=True).GetFingerprint(m)
    fp2 = rdFingerprintGenerator.GetAtomPairGenerator(fpSize=2048, countSimulation=True,
                                                      countBounds=(1, 8, 16, 32)).GetFingerprint(m)
    self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())

  def testNumpyFingerprints(self):
    m = Chem.MolFromSmiles('COc1ccc(CCNC(=O)c2ccccc2C(=O)NCCc2ccc(OC)cc2)cc1')
    for fn in (rdFingerprintGenerator.GetRDKitFPGenerator,
               rdFingerprintGenerator.GetMorganGenerator,
               rdFingerprintGenerator.GetAtomPairGenerator,
               rdFingerprintGenerator.GetTopologicalTorsionGenerator):
      gen = fn(fpSize=2048)
      bv = gen.GetFingerprint(m)
      oarr = np.zeros((bv.GetNumBits(), ), 'u1')
      DataStructs.ConvertToNumpyArray(bv, oarr)
      arr = gen.GetFingerprintAsNumPy(m)
      np.testing.assert_array_equal(oarr, arr)

      fp = gen.GetCountFingerprint(m)
      oarr = np.zeros((fp.GetLength(), ), 'u4')
      DataStructs.ConvertToNumpyArray(fp, oarr)
      arr = gen.GetCountFingerprintAsNumPy(m)
      np.testing.assert_array_equal(oarr, arr)


if __name__ == '__main__':
  unittest.main()