File: testPickers.py

package info (click to toggle)
rdkit 201203-3
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 37,840 kB
  • sloc: cpp: 93,902; python: 51,897; java: 5,192; ansic: 3,497; xml: 2,499; sql: 1,641; yacc: 1,518; lex: 1,076; makefile: 325; fortran: 183; sh: 153; cs: 51
file content (141 lines) | stat: -rwxr-xr-x 3,977 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from rdkit import RDConfig
import unittest
from rdkit.SimDivFilters import rdSimDivPickers
from rdkit.DataManip.Metric import rdMetricMatrixCalc as rdmmc
import numpy
import random

class TestCase(unittest.TestCase):
  def setUp(self) :
      self.n = 1000
      self.m = 80
      self.d = 2
      self.dataPts = numpy.zeros((self.n, self.d), 'd')
      for i in range(self.n):
        for j in range(self.d):
          self.dataPts[i,j] = random.random()
      self.dMat = rdmmc.GetEuclideanDistMat(self.dataPts)
       
  def test0MaxMin(self):
    pkr = rdSimDivPickers.MaxMinPicker()
    maxmin = pkr.Pick(self.dMat, self.n, self.m,(886,112))
    self.failUnless(maxmin[0]==886)
    self.failUnless(maxmin[1]==112)

    def func(i,j):
      if i==j:
        return 0.0
      if i<j:
        j,i=i,j
      return self.dMat[i*(i-1)/2+j]
    lmaxmin = pkr.LazyPick(func, self.n, self.m,(886,112))
    self.failUnless(list(lmaxmin)==list(maxmin))

    self.failUnlessRaises(ValueError,lambda:pkr.Pick(self.dMat, self.n, self.m,(1012,)))
    self.failUnlessRaises(ValueError,lambda:pkr.Pick(self.dMat, self.n, self.m,(-1,)))

    maxmin = pkr.Pick(self.dMat, self.n, self.m)
    self.failUnless(maxmin)
    lmaxmin = pkr.LazyPick(func, self.n, self.m)
    self.failUnless(lmaxmin)

  def test1HierarchPick(self) :
    infil = open("test_data/points.csv", 'r')
    lines = infil.readlines()
    infil.close()
    self.dataPts = numpy.zeros((len(lines), 2), 'd')
    labels = []
    i = 0
    for line in lines :
      tlst = line.strip().split(',')
      self.dataPts[i, 0] = float(tlst[1])
      self.dataPts[i, 1] = float(tlst[2])
      labels.append(int(tlst[3]))
      i += 1
    self.dMat = rdmmc.GetEuclideanDistMat(self.dataPts)
    pkr = rdSimDivPickers.HierarchicalClusterPicker(rdSimDivPickers.ClusterMethod.WARD)
    clusters = pkr.Cluster(self.dMat, i, 2)
    # check that each of the clusters have the same label
    for cl in clusters :
      clbl = labels[cl[0]]
      for id in cl:
        assert clbl == labels[id]
    hierarch = pkr.Pick(self.dMat, i, 2)
    assert tuple(hierarch) == (1,30)


  def testIssue208(self) :
    sz = 10
    N=3
    m = []
    for i in range(sz):
      for j in range(i+1,sz):
        m.append(random.random())
    m = numpy.array(m)
    picker = rdSimDivPickers.HierarchicalClusterPicker(rdSimDivPickers.ClusterMethod.WARD)
    p1 = list(picker.Pick(m,sz,N))
    p1.sort()
    p2 = list(picker.Pick(m,sz,N))
    p2.sort()
    self.failUnless(p1==p2)

  def testInts(self) :
    """ make sure we can handle ints too """
    sz = 10
    N=3
    m = []
    for i in range(sz):
      for j in range(i+1,sz):
        m.append(int(100*random.random()))
    m = numpy.array(m)
    picker = rdSimDivPickers.HierarchicalClusterPicker(rdSimDivPickers.ClusterMethod.WARD)
    p1 = list(picker.Pick(m,sz,N))
    p1.sort()
    p2 = list(picker.Pick(m,sz,N))
    p2.sort()
    self.failUnless(p1==p2)

            
  def testNonUniqueCrash(self) :
    from rdkit import DataStructs
    sz = 10
    nbits=20
    nBitsToSet=int(nbits*.3)
    N=12
    vs = []
    for i in range(sz):
      bv = DataStructs.ExplicitBitVect(nbits)
      for j in range(nBitsToSet):
        val= int(nbits*random.random())
        bv.SetBit(val)
      vs.append(bv)
      vs.append(bv)
    def taniFunc(i,j,bvs = vs):
      d = 1-DataStructs.FingerprintSimilarity(bvs[i],bvs[j])
      return d
    picker = rdSimDivPickers.MaxMinPicker()
    try:
      mm = picker.LazyPick(taniFunc,len(vs),N)
    except:
      ok=False
    else:
      ok=True
    self.failUnless(ok)
    self.failUnless(len(mm)==N)
    picker = None

    ds = []
    nvs = len(vs)
    for i in range(nvs):
      for j in range(i+1,nvs):
        d = taniFunc(i,j)
        ds.append(d)
    m = numpy.array(ds)
    picker = rdSimDivPickers.HierarchicalClusterPicker(rdSimDivPickers.ClusterMethod.WARD)
    p1 = list(picker.Pick(m,nvs,N))

            
if __name__ == '__main__':
    unittest.main()