File: bench2.py

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (52 lines) | stat: -rw-r--r-- 1,333 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from __future__ import print_function
from rdkit import Chem
from rdkit import RDConfig
import time, sys, gzip
from rdkit.six.moves import cPickle
from rdkit.RDLogger import logger
logger = logger()

logger.info('reading smarts')
qs = []
smas = []
for line in file(RDConfig.RDDataDir + '/SmartsLib/RLewis_smarts.txt', 'r').readlines():
  if line[0] == '#':
    continue
  line = line.split(' ')
  p = Chem.MolFromSmarts(line[0])
  if not p:
    print(line[0], file=sys.stderr)
    continue
  smas.append(line[0])
  qs.append(p)

logger.info('reading target counts')
refFps = cPickle.loads(gzip.open('fps.1000.counts.pkl.gz', 'rb').read())

fps = []
logger.info('reading mols:')
ms = cPickle.loads(gzip.open('mols.1000.pkl.gz', 'rb').read())
t1 = time.time()
nFail = 0
for i, m in enumerate(ms):
  fp = [0] * len(qs)
  for j, q in enumerate(qs):
    o = m.GetSubstructMatches(q)
    if len(o) != refFps[i][j]:
      print('  >', i, j, o, refFps[i][j], Chem.MolToSmiles(m), smas[j])
      nFail += 1
      if nFail == 10:
        raise ValueError
    fp[j] = len(o)
  fps.append(fp)
  if not i % 50:
    logger.info('Done %d' % i)
t2 = time.time()
print('%.2f' % (t2 - t1))

#cPickle.dump(fps,file('fps.1000.counts.pkl','wb+'))
nFail = 0
for i, fp in enumerate(fps):
  if fp != refFps[i]:
    nFail += 1
print('%d mismatches' % nFail)