File: UnitTestCluster.py

package info (click to toggle)
rdkit 201203-3
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 37,840 kB
  • sloc: cpp: 93,902; python: 51,897; java: 5,192; ansic: 3,497; xml: 2,499; sql: 1,641; yacc: 1,518; lex: 1,076; makefile: 325; fortran: 183; sh: 153; cs: 51
file content (110 lines) | stat: -rwxr-xr-x 3,720 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# $Id: UnitTestCluster.py 1528 2010-09-26 17:04:37Z glandrum $
#
#  Copyright (C) 2001-2008  greg Landrum and Rational Discovery LLC
#
#   @@ All Rights Reserved @@
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.
#
"""unit testing code for clustering

"""
from rdkit import RDConfig
import unittest
from rdkit.ML.Cluster import Standardize,ClusterUtils
from rdkit.ML.Cluster import Clusters
try:
  from rdkit.ML.Cluster import Murtagh
except ImportError:
  Murtagh=None
import numpy
import cPickle


class TestCase(unittest.TestCase):
  def setUp(self):
    # this is the data set used by Romesburg in "Cluster Analysis for Researchers"
    #  to demonstrate the different clustering methods
    #print '\n%s: '%self.shortDescription(),
    self.d = numpy.array([[10.,5.],[20.,20.],[30.,10.],[30.,15.],[5.,10.]])
    self.names = ['p1','p2','p3','p4','p5']
  def testDivide(self):
    " tests the cluster division algorithms "
    ca = Clusters.Cluster(index=1)
    cb = Clusters.Cluster(index=2)
    cc = Clusters.Cluster(index=3)
    cd = Clusters.Cluster(index=4)
    ce = Clusters.Cluster(index=5)
    cf = Clusters.Cluster(index=6)

    c1 = Clusters.Cluster(metric=10,children=[ca,cb],index=7)
    c2 = Clusters.Cluster(metric=15,children=[cc,cd],index=8)
    c3 = Clusters.Cluster(metric=20,children=[ce,cf],index=9)
    c4 = Clusters.Cluster(metric=25,children=[c2,c3],index=10)
    c5 = Clusters.Cluster(metric=30,children=[c4,c1],index=11)

    cs = ClusterUtils.SplitIntoNClusters(c5,4,breadthFirst=1)
    assert len(cs)==4,'bad split length'
    indices = [x.GetIndex() for x in cs]
    for index in [9,8,1,2]:
      assert index in indices,'index %d not found in %s'%(index,str(indices))
    # we may not want to preserve order, but test it for now
    assert indices==[9,8,1,2],'bad index order'
    
    cs2 = ClusterUtils.SplitIntoNClusters(c5,4,breadthFirst=0)
    indices = [x.GetIndex() for x in cs2]
    for index in [8,7,5,6]:
      assert index in indices,'index %d not found in %s'%(index,str(indices))
    # we may not want to preserve order, but test it for now
    assert indices==[8,7,5,6],'bad index order'

  def testMurtaghUPGMA(self):
    if Murtagh is None: return
    nPts = 5
    sz = 5
    dataP = numpy.random.random((nPts,sz))
    newClust = Murtagh.ClusterData(dataP,nPts,Murtagh.UPGMA)[0]
    ds = []
    for i in range(nPts):
      for j in range(i):
        d = dataP[i]-dataP[j]
        ds.append(sum(d*d))
    ds = numpy.array(ds)    
    newClust2 = Murtagh.ClusterData(ds,nPts,Murtagh.UPGMA,isDistData=1)[0]

    assert len(newClust)==len(newClust2),'length mismatch2'

    assert not newClust.Compare(newClust2,ignoreExtras=0),'equality failed3'

    newClust2 = Murtagh.ClusterData(dataP,nPts,Murtagh.UPGMA,isDistData=0)[0]
    assert len(newClust)==len(newClust2),'length mismatch2'

    assert not newClust.Compare(newClust2,ignoreExtras=0),'equality failed3'

  def testGetNodeList(self):
    """ tests the GetNodeList functionality """
    root = Clusters.Cluster(index=1)
    c1 = Clusters.Cluster(index=10)
    c1.AddChild(Clusters.Cluster(index=30))
    c1.AddChild(Clusters.Cluster(index=31))
    c1.AddChild(Clusters.Cluster(index=32))
    
    c2 = Clusters.Cluster(index=11)
    c2.AddChild(Clusters.Cluster(index=40))
    c2.AddChild(Clusters.Cluster(index=41))

    root.AddChild(c1)
    root.AddChild(c2)
    nodes = ClusterUtils.GetNodeList(root)
    
    indices = [x.GetIndex() for x in nodes]
    assert indices==[30,31,32,10,40,41,11,1],'bad indices'

profileTest=0


if __name__ == '__main__':
  unittest.main()