1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
from rdkit import RDConfig
import unittest
from rdkit.SimDivFilters import rdSimDivPickers
from rdkit.DataManip.Metric import rdMetricMatrixCalc as rdmmc
import numpy
import random
class TestCase(unittest.TestCase):
def setUp(self) :
self.n = 1000
self.m = 80
self.d = 2
self.dataPts = numpy.zeros((self.n, self.d), 'd')
for i in range(self.n):
for j in range(self.d):
self.dataPts[i,j] = random.random()
self.dMat = rdmmc.GetEuclideanDistMat(self.dataPts)
def test0MaxMin(self):
pkr = rdSimDivPickers.MaxMinPicker()
maxmin = pkr.Pick(self.dMat, self.n, self.m,(886,112))
self.failUnless(maxmin[0]==886)
self.failUnless(maxmin[1]==112)
def func(i,j):
if i==j:
return 0.0
if i<j:
j,i=i,j
return self.dMat[i*(i-1)/2+j]
lmaxmin = pkr.LazyPick(func, self.n, self.m,(886,112))
self.failUnless(list(lmaxmin)==list(maxmin))
self.failUnlessRaises(ValueError,lambda:pkr.Pick(self.dMat, self.n, self.m,(1012,)))
self.failUnlessRaises(ValueError,lambda:pkr.Pick(self.dMat, self.n, self.m,(-1,)))
maxmin = pkr.Pick(self.dMat, self.n, self.m)
self.failUnless(maxmin)
lmaxmin = pkr.LazyPick(func, self.n, self.m)
self.failUnless(lmaxmin)
def test1HierarchPick(self) :
infil = open("test_data/points.csv", 'r')
lines = infil.readlines()
infil.close()
self.dataPts = numpy.zeros((len(lines), 2), 'd')
labels = []
i = 0
for line in lines :
tlst = line.strip().split(',')
self.dataPts[i, 0] = float(tlst[1])
self.dataPts[i, 1] = float(tlst[2])
labels.append(int(tlst[3]))
i += 1
self.dMat = rdmmc.GetEuclideanDistMat(self.dataPts)
pkr = rdSimDivPickers.HierarchicalClusterPicker(rdSimDivPickers.ClusterMethod.WARD)
clusters = pkr.Cluster(self.dMat, i, 2)
# check that each of the clusters have the same label
for cl in clusters :
clbl = labels[cl[0]]
for id in cl:
assert clbl == labels[id]
hierarch = pkr.Pick(self.dMat, i, 2)
assert tuple(hierarch) == (1,30)
def testIssue208(self) :
sz = 10
N=3
m = []
for i in range(sz):
for j in range(i+1,sz):
m.append(random.random())
m = numpy.array(m)
picker = rdSimDivPickers.HierarchicalClusterPicker(rdSimDivPickers.ClusterMethod.WARD)
p1 = list(picker.Pick(m,sz,N))
p1.sort()
p2 = list(picker.Pick(m,sz,N))
p2.sort()
self.failUnless(p1==p2)
def testInts(self) :
""" make sure we can handle ints too """
sz = 10
N=3
m = []
for i in range(sz):
for j in range(i+1,sz):
m.append(int(100*random.random()))
m = numpy.array(m)
picker = rdSimDivPickers.HierarchicalClusterPicker(rdSimDivPickers.ClusterMethod.WARD)
p1 = list(picker.Pick(m,sz,N))
p1.sort()
p2 = list(picker.Pick(m,sz,N))
p2.sort()
self.failUnless(p1==p2)
def testNonUniqueCrash(self) :
from rdkit import DataStructs
sz = 10
nbits=20
nBitsToSet=int(nbits*.3)
N=12
vs = []
for i in range(sz):
bv = DataStructs.ExplicitBitVect(nbits)
for j in range(nBitsToSet):
val= int(nbits*random.random())
bv.SetBit(val)
vs.append(bv)
vs.append(bv)
def taniFunc(i,j,bvs = vs):
d = 1-DataStructs.FingerprintSimilarity(bvs[i],bvs[j])
return d
picker = rdSimDivPickers.MaxMinPicker()
try:
mm = picker.LazyPick(taniFunc,len(vs),N)
except:
ok=False
else:
ok=True
self.failUnless(ok)
self.failUnless(len(mm)==N)
picker = None
ds = []
nvs = len(vs)
for i in range(nvs):
for j in range(i+1,nvs):
d = taniFunc(i,j)
ds.append(d)
m = numpy.array(ds)
picker = rdSimDivPickers.HierarchicalClusterPicker(rdSimDivPickers.ClusterMethod.WARD)
p1 = list(picker.Pick(m,nvs,N))
if __name__ == '__main__':
unittest.main()
|