File: testFPB.py

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (273 lines) | stat: -rw-r--r-- 11,861 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
from rdkit import DataStructs
from rdkit import RDConfig
import unittest, os


def feq(a, b, tol=1e-4):
  return abs(a - b) < tol


class TestCase(unittest.TestCase):

  def setUp(self):
    self.dirname = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs', 'testData')
    self.filename = os.path.join(self.dirname, 'zim.head100.fpb')
    self.fpbr = DataStructs.FPBReader(self.filename)
    self.fpbr.Init()

  def test1Basics(self):
    self.assertEqual(len(self.fpbr), 100)
    self.assertEqual(self.fpbr.GetNumBits(), 2048)
    self.assertEqual(self.fpbr.GetId(0), "ZINC00902219")
    self.assertEqual(self.fpbr.GetId(3), "ZINC04803506")

    fp = self.fpbr.GetFP(0)
    self.assertEqual(fp.GetNumBits(), 2048)
    self.assertEqual(fp.GetNumOnBits(), 17)
    obs = (1, 80, 183, 222, 227, 231, 482, 650, 807, 811, 831, 888, 1335, 1411, 1664, 1820, 1917)
    obl = tuple(fp.GetOnBits())
    self.assertEqual(obs, obl)

    # test operator[]
    fp, nm = self.fpbr[0]
    self.assertEqual(nm, "ZINC00902219")
    self.assertEqual(fp.GetNumOnBits(), 17)

  def test2Tanimoto(self):
    bv = self.fpbr.GetBytes(0)
    self.assertAlmostEqual(self.fpbr.GetTanimoto(0, bv), 1.0, 4)
    self.assertAlmostEqual(self.fpbr.GetTanimoto(1, bv), 0.3704, 4)
    tpl = self.fpbr.GetTanimotoNeighbors(bv)
    self.assertEqual(len(tpl), 1)
    self.assertEqual(tpl[0][1], 0)
    self.assertAlmostEqual(tpl[0][0], 1., 4)
    tpl = self.fpbr.GetTanimotoNeighbors(bv, threshold=0.3)
    self.assertEqual(len(tpl), 5)
    self.assertEqual(tpl[0][1], 0)
    self.assertAlmostEqual(tpl[0][0], 1., 4)
    self.assertEqual(tpl[1][1], 1)
    self.assertAlmostEqual(tpl[1][0], 0.3704, 4)

  def test3Tversky(self):
    bv = self.fpbr.GetBytes(0)
    self.assertAlmostEqual(self.fpbr.GetTversky(0, bv, 1, 1), 1.0, 4)
    self.assertAlmostEqual(self.fpbr.GetTversky(1, bv, 1, 1), 0.3704, 4)
    tpl = self.fpbr.GetTverskyNeighbors(bv, 1, 1)
    self.assertEqual(len(tpl), 1)
    self.assertEqual(tpl[0][1], 0)
    self.assertAlmostEqual(tpl[0][0], 1., 4)
    tpl = self.fpbr.GetTverskyNeighbors(bv, 1, 1, threshold=0.3)
    self.assertEqual(len(tpl), 5)
    self.assertEqual(tpl[0][1], 0)
    self.assertAlmostEqual(tpl[0][0], 1., 4)
    self.assertEqual(tpl[1][1], 1)
    self.assertAlmostEqual(tpl[1][0], 0.3704, 4)

  def test4Contains(self):
    bv = self.fpbr.GetBytes(0)
    nbrs = self.fpbr.GetContainingNeighbors(bv)
    self.assertEqual(len(nbrs), 1)
    self.assertEqual(nbrs[0], 0)

    bv = self.fpbr.GetBytes(1)
    nbrs = self.fpbr.GetContainingNeighbors(bv)
    self.assertEqual(len(nbrs), 4)
    self.assertEqual(nbrs, (1, 2, 3, 4))

  def test5Contains(self):
    " an example based on substructure screening "
    filename = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs', 'testData',
                            'zinc_all_clean.100.patt1k.fpb')
    fpbr = DataStructs.FPBReader(filename)
    fpbr.Init()
    bytes = b'\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000\x00@\x00 \x00\x00 \x00\x00\x02@\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00`\x07\x00\x04\x00"\x14\x02\x00\x00"\x00\x00\x00\x00\x08\x00\x80\x00\x00@\x00@\x00\x80\x00\x00\x00\x00B\x00\x00\x80\x00\x80\x08\x00\x04\x00@\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00  \x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x80\x04\x00\x00\x0c\x00\x00\x00@\x88\x10\x10\x00\x00\x88\x00@'
    nbrs = fpbr.GetContainingNeighbors(bytes)
    self.assertEqual(len(nbrs), 9)
    ids = sorted(fpbr.GetId(x) for x in nbrs)
    self.assertEqual(ids, ['ZINC00000562', 'ZINC00000843', 'ZINC00000969', 'ZINC00001484',
                           'ZINC00001585', 'ZINC00002094', 'ZINC00004739', 'ZINC00005235',
                           'ZINC00006300'])

  def test6MultiFPBReaderTani(self):
    basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs', 'testData')
    mfpbr = DataStructs.MultiFPBReader()
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.1.patt.fpb"))), 1)
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.2.patt.fpb"))), 2)
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.3.patt.fpb"))), 3)
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.4.patt.fpb"))), 4)
    mfpbr.Init()
    self.assertEqual(mfpbr.GetNumBits(), 1024)
    self.assertEqual(len(mfpbr), 4)

    fps = "0000000000404000100000001000040000300040222000002004000240000020000000"+\
"8200010200000090000024040860070044003214820000220401054008018000226000"+\
"4800800140000042000080008008020482400000200410800000300430200800400000"+\
"0000080a0000800400010c800200648818100010880040"
    ebv = DataStructs.CreateFromFPSText(fps)
    bytes = DataStructs.BitVectToBinaryText(ebv)
    nbrs = mfpbr.GetTanimotoNeighbors(bytes, threshold=0.6)
    self.assertEqual(len(nbrs), 6)
    self.assertAlmostEqual(nbrs[0][0], 0.66412, 4)
    self.assertEqual(nbrs[0][1], 0)
    self.assertEqual(nbrs[0][2], 3)
    self.assertAlmostEqual(nbrs[1][0], 0.65289, 4)
    self.assertEqual(nbrs[1][1], 1)
    self.assertEqual(nbrs[1][2], 2)
    self.assertAlmostEqual(nbrs[2][0], 0.64341, 4)
    self.assertEqual(nbrs[2][1], 2)
    self.assertEqual(nbrs[2][2], 1)
    self.assertAlmostEqual(nbrs[3][0], 0.61940, 4)
    self.assertEqual(nbrs[3][1], 1)
    self.assertEqual(nbrs[3][2], 0)
    self.assertAlmostEqual(nbrs[4][0], 0.61905, 4)
    self.assertEqual(nbrs[4][1], 0)
    self.assertEqual(nbrs[4][2], 0)
    self.assertAlmostEqual(nbrs[5][0], 0.61344, 4)
    self.assertEqual(nbrs[5][1], 0)
    self.assertEqual(nbrs[5][2], 1)

    # test multi-threaded (won't do anything if the RDKit isn't compiled with threads support)
    nbrs = mfpbr.GetTanimotoNeighbors(bytes, threshold=0.6, numThreads=4)
    self.assertEqual(len(nbrs), 6)
    self.assertAlmostEqual(nbrs[0][0], 0.66412, 4)
    self.assertEqual(nbrs[0][1], 0)
    self.assertEqual(nbrs[0][2], 3)
    self.assertAlmostEqual(nbrs[1][0], 0.65289, 4)
    self.assertEqual(nbrs[1][1], 1)
    self.assertEqual(nbrs[1][2], 2)
    self.assertAlmostEqual(nbrs[2][0], 0.64341, 4)
    self.assertEqual(nbrs[2][1], 2)
    self.assertEqual(nbrs[2][2], 1)
    self.assertAlmostEqual(nbrs[3][0], 0.61940, 4)
    self.assertEqual(nbrs[3][1], 1)
    self.assertEqual(nbrs[3][2], 0)
    self.assertAlmostEqual(nbrs[4][0], 0.61905, 4)
    self.assertEqual(nbrs[4][1], 0)
    self.assertEqual(nbrs[4][2], 0)
    self.assertAlmostEqual(nbrs[5][0], 0.61344, 4)
    self.assertEqual(nbrs[5][1], 0)
    self.assertEqual(nbrs[5][2], 1)

  def test7MultiFPBReaderContains(self):
    basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs', 'testData')
    mfpbr = DataStructs.MultiFPBReader()
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.1.patt.fpb"))), 1)
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.2.patt.fpb"))), 2)
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.3.patt.fpb"))), 3)
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.4.patt.fpb"))), 4)
    mfpbr.Init()
    self.assertEqual(mfpbr.GetNumBits(), 1024)
    self.assertEqual(len(mfpbr), 4)

    fps = "40081010824820021000500010110410003000402b20285000a4040240010030050000"+\
            "080001420040009000003d04086007080c03b31d920004220400074008098010206080"+\
            "00488001080000c64002a00080000200024c2000602410049200340820200002400010"+\
            "02200106090401056801080182006088101000088a0048"
    ebv = DataStructs.CreateFromFPSText(fps)
    bytes = DataStructs.BitVectToBinaryText(ebv)
    nbrs = mfpbr.GetContainingNeighbors(bytes)
    self.assertEqual(len(nbrs), 9)
    self.assertEqual(nbrs[0][0], 160)
    self.assertEqual(nbrs[0][1], 0)
    self.assertEqual(nbrs[1][0], 163)
    self.assertEqual(nbrs[1][1], 0)
    self.assertEqual(nbrs[2][0], 170)
    self.assertEqual(nbrs[2][1], 0)
    self.assertEqual(nbrs[3][0], 180)
    self.assertEqual(nbrs[3][1], 2)
    self.assertEqual(nbrs[4][0], 182)
    self.assertEqual(nbrs[4][1], 3)
    self.assertEqual(nbrs[5][0], 185)
    self.assertEqual(nbrs[5][1], 0)
    self.assertEqual(nbrs[6][0], 189)
    self.assertEqual(nbrs[6][1], 0)
    self.assertEqual(nbrs[7][0], 192)
    self.assertEqual(nbrs[7][1], 3)
    self.assertEqual(nbrs[8][0], 193)
    self.assertEqual(nbrs[8][1], 0)

    nbrs = mfpbr.GetContainingNeighbors(bytes, numThreads=4)
    self.assertEqual(len(nbrs), 9)
    self.assertEqual(nbrs[0][0], 160)
    self.assertEqual(nbrs[0][1], 0)
    self.assertEqual(nbrs[1][0], 163)
    self.assertEqual(nbrs[1][1], 0)
    self.assertEqual(nbrs[2][0], 170)
    self.assertEqual(nbrs[2][1], 0)
    self.assertEqual(nbrs[3][0], 180)
    self.assertEqual(nbrs[3][1], 2)
    self.assertEqual(nbrs[4][0], 182)
    self.assertEqual(nbrs[4][1], 3)
    self.assertEqual(nbrs[5][0], 185)
    self.assertEqual(nbrs[5][1], 0)
    self.assertEqual(nbrs[6][0], 189)
    self.assertEqual(nbrs[6][1], 0)
    self.assertEqual(nbrs[7][0], 192)
    self.assertEqual(nbrs[7][1], 3)
    self.assertEqual(nbrs[8][0], 193)
    self.assertEqual(nbrs[8][1], 0)

  def test8MultiFPBReaderContainsInitOnSearch(self):
    basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs', 'testData')
    mfpbr = DataStructs.MultiFPBReader(initOnSearch=True)
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.1.patt.fpb"))), 1)
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.2.patt.fpb"))), 2)
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.3.patt.fpb"))), 3)
    self.assertEqual(
      mfpbr.AddReader(DataStructs.FPBReader(os.path.join(basen, "zinc_random200.4.patt.fpb"))), 4)

    fps = "40081010824820021000500010110410003000402b20285000a4040240010030050000"+\
            "080001420040009000003d04086007080c03b31d920004220400074008098010206080"+\
            "00488001080000c64002a00080000200024c2000602410049200340820200002400010"+\
            "02200106090401056801080182006088101000088a0048"
    ebv = DataStructs.CreateFromFPSText(fps)
    bytes = DataStructs.BitVectToBinaryText(ebv)
    nbrs = mfpbr.GetContainingNeighbors(bytes, numThreads=4)
    self.assertEqual(len(nbrs), 9)
    self.assertEqual(nbrs[0][0], 160)
    self.assertEqual(nbrs[0][1], 0)
    self.assertEqual(nbrs[1][0], 163)
    self.assertEqual(nbrs[1][1], 0)
    self.assertEqual(nbrs[2][0], 170)
    self.assertEqual(nbrs[2][1], 0)
    self.assertEqual(nbrs[3][0], 180)
    self.assertEqual(nbrs[3][1], 2)
    self.assertEqual(nbrs[4][0], 182)
    self.assertEqual(nbrs[4][1], 3)
    self.assertEqual(nbrs[5][0], 185)
    self.assertEqual(nbrs[5][1], 0)
    self.assertEqual(nbrs[6][0], 189)
    self.assertEqual(nbrs[6][1], 0)
    self.assertEqual(nbrs[7][0], 192)
    self.assertEqual(nbrs[7][1], 3)
    self.assertEqual(nbrs[8][0], 193)
    self.assertEqual(nbrs[8][1], 0)

  def test9MultiFPBReaderEdges(self):
    basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs', 'testData')
    mfpbr = DataStructs.MultiFPBReader()
    mfpbr.Init()

    fps = "0000000000404000100000001000040000300040222000002004000240000020000000"+\
"8200010200000090000024040860070044003214820000220401054008018000226000"+\
"4800800140000042000080008008020482400000200410800000300430200800400000"+\
"0000080a0000800400010c800200648818100010880040"
    ebv = DataStructs.CreateFromFPSText(fps)
    bytes = DataStructs.BitVectToBinaryText(ebv)
    nbrs = mfpbr.GetTanimotoNeighbors(bytes, threshold=0.6)
    self.assertEqual(len(nbrs), 0)


if __name__ == '__main__':
  unittest.main()