File: bench2.py

package info (click to toggle)
rdkit 201203-3
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 37,840 kB
  • sloc: cpp: 93,902; python: 51,897; java: 5,192; ansic: 3,497; xml: 2,499; sql: 1,641; yacc: 1,518; lex: 1,076; makefile: 325; fortran: 183; sh: 153; cs: 51
file content (50 lines) | stat: -rw-r--r-- 1,298 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from rdkit import Chem
from rdkit import RDConfig
import time,cPickle,sys,gzip
from rdkit.RDLogger import logger
logger = logger()

logger.info('reading smarts')
qs = []
smas = []
for line in file(RDConfig.RDDataDir+'/SmartsLib/RLewis_smarts.txt','r').readlines():
    if line[0] == '#':
        continue
    line = line.split(' ')
    p = Chem.MolFromSmarts(line[0])
    if not p:
        print >>sys.stderr,line[0]
        continue
    smas.append(line[0])
    qs.append(p)

logger.info('reading target counts')
refFps = cPickle.loads(gzip.open('fps.1000.counts.pkl.gz','rb').read())

fps = []   
logger.info('reading mols:')
ms = cPickle.loads(gzip.open('mols.1000.pkl.gz','rb').read())
t1 = time.time()
nFail=0
for i,m in enumerate(ms):
    fp = [0]*len(qs)
    for j,q in enumerate(qs):
        o=m.GetSubstructMatches(q)
        if len(o)!=refFps[i][j]:
            print '  >',i,j,o,refFps[i][j],Chem.MolToSmiles(m),smas[j]
            nFail += 1
            if nFail==10:
                raise ValueError
        fp[j]=len(o)
    fps.append(fp)
    if not i%50:
        logger.info('Done %d'%i)
t2 = time.time()
print '%.2f'%(t2-t1)

#cPickle.dump(fps,file('fps.1000.counts.pkl','wb+'))
nFail=0
for i,fp in enumerate(fps):
    if fp!=refFps[i]:
        nFail+=1
print '%d mismatches'%nFail