File: clustering_comparisons.py

package info (click to toggle)
nipy 0.1.2%2B20100526-2
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 11,992 kB
  • ctags: 13,434
  • sloc: python: 47,720; ansic: 41,334; makefile: 197
file content (48 lines) | stat: -rw-r--r-- 1,402 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
"""
Simple demo that partitions a smooth field into 10 clusters
In that case, Ward's clustering behaves best, by far.

Author: Bertrand Thirion, 2009
"""

import numpy as np
import numpy.random as nr
import nipy.neurospin.graph.field as ff


dx = 50
dy = 50
dz = 1
nbseeds=10
F = ff.Field(dx*dy*dz)
xyz = np.reshape(np.indices((dx,dy,dz)),(3,dx*dy*dz)).T.astype(np.int)
F.from_3d_grid(xyz,18)
#data = 3*nr.randn(dx*dy*dz) + np.sum((xyz-xyz.mean(0))**2,1)
#F.set_field(np.reshape(data,(dx*dy*dz,1)))
data = nr.randn(dx*dy*dz,1)
F.set_weights(F.get_weights()/18)
F.set_field(data)
F.diffusion(5)
data = F.get_field()

seeds = np.argsort(nr.rand(F.V))[:nbseeds]
seeds, label, J0 = F.geodesic_kmeans(seeds)
wlabel, J1 = F.ward(nbseeds)
seeds, label, J2 = F.geodesic_kmeans(seeds,label=wlabel.copy(), eps = 1.e-7)

print 'inertia values for the 3 algorithms: ',J0,J1,J2

import matplotlib.pylab as mp
mp.figure()
mp.subplot(1,3,1)
mp.imshow(np.reshape(data,(dx,dy)),interpolation='nearest' )
mp.title('Input data')
mp.subplot(1,3,2)
mp.imshow(np.reshape(wlabel,(dx,dy)),interpolation='nearest' )
mp.title('Ward clustering \n into 10 components')
mp.subplot(1,3,3)
mp.imshow(np.reshape(label,(dx,dy)),interpolation='nearest' )
mp.title('geodesic kmeans clust. \n into 10 components')
mp.show()