File: testCorrMatGen.py

package info (click to toggle)
rdkit 201203-3
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 37,840 kB
  • sloc: cpp: 93,902; python: 51,897; java: 5,192; ansic: 3,497; xml: 2,499; sql: 1,641; yacc: 1,518; lex: 1,076; makefile: 325; fortran: 183; sh: 153; cs: 51
file content (107 lines) | stat: -rwxr-xr-x 3,376 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
## Automatically adapted for numpy.oldnumeric Jun 27, 2008 by -c

from rdkit import RDConfig
import unittest
from rdkit.ML.InfoTheory import rdInfoTheory
try:
  from rdkit.ML.InfoTheory import BitClusterer
except ImportError:
  BitClusterer=None
from rdkit.ML.Data import DataUtils
from rdkit import DataStructs
import random

def getValLTM(i, j, mat):
    if (i > j) :
        id = (i*(i-1)/2) + j
        return mat[id]
    elif (j > i) :
        id = (j*(j-1)/2) + i
        return mat[id]
    else :
        return 0.0
        
class TestCase(unittest.TestCase):
    def setUp(self) :
        # here is what we are going to do to test this out
        # - generate bit vectrs of length nbits
        # - turn on a fraction of the first nbits/2 bits at random
        # - for each bit i turned on in the range (0, nbits/2) turn on the bit
        #   nbits/2 + i
        # - basically the first half of a fingerprint is same as the second half of the
        #   fingerprint
        # - if we repeat this process often enough we whould see strong correlation between
        #   the bits i (i < nbits/2) and (nbits/2 + i)
        DataUtils.InitRandomNumbers((100,23))
        self.nbits = 200
        self.d = 40
        self.nfp = 1000

        self.blist = range(self.nbits)
                
        self.fps = []
        for fi in range(self.nfp) :
            fp = DataStructs.ExplicitBitVect(self.nbits)
            obits = range(self.nbits/2)
            random.shuffle(obits)
            obits = obits[0:self.d]
            for bit in obits :
                fp.SetBit(bit)
                fp.SetBit(bit + self.nbits/2)
            self.fps.append(fp)

    def test0CorrMat(self) :
        cmg = rdInfoTheory.BitCorrMatGenerator()
        cmg.SetBitList(self.blist)
        for fp in self.fps:
            cmg.CollectVotes(fp)

        corrMat = cmg.GetCorrMatrix()

        avr = 0.0
        navr = 0.0
        for i in range(self.nbits/2) :
            avr += getValLTM(i, i + self.nbits/2, corrMat)
            navr += getValLTM(i,i+1, corrMat)

        assert 2*avr/self.nbits == 400.0
        assert 2*navr/self.nbits == 158.3,2*navr/self.nbits

    def test1Cluster(self) :
        if BitClusterer is None:
            return
        cmg = rdInfoTheory.BitCorrMatGenerator()
        cmg.SetBitList(self.blist)
        for fp in self.fps:
            cmg.CollectVotes(fp)

        corrMat = cmg.GetCorrMatrix()
        
        bcl = BitClusterer.BitClusterer(self.blist, self.nbits/2)
        bcl.ClusterBits(corrMat)
        cls = bcl.GetClusters()
        for cl in cls :
            assert len(cl) == 2
            assert (cl[0] + self.nbits/2) == cl[1]

        tfp = DataStructs.ExplicitBitVect(self.nbits)
        obits = range(0,self.nbits/4) + range(self.nbits/2, 3*self.nbits/4)
        tfp.SetBitsFromList(obits)
        rvc = bcl.MapToClusterScores(tfp)
        assert len(rvc) == self.nbits/2
        for i in range(self.nbits/2) :
            if i < self.nbits/4:
                assert rvc[i] == 2
            else :
                assert rvc[i] == 0

        nfp = bcl.MapToClusterFP(tfp)
        assert len(nfp) == self.nbits/2
        for i in range(self.nbits/2) :
           if i < self.nbits/4:
                assert nfp[i]
           else :
               assert not nfp[i]
               
if __name__ == '__main__':
    unittest.main()