File: pam.R

package info (click to toggle)
cluster 2.1.8.2-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 1,732 kB
sloc: ansic: 3,397; sh: 20; makefile: 2
file content (227 lines) | stat: -rw-r--r-- 9,703 bytes
parent folder | download | duplicates (4)
library(cluster)
## Compare on these:
nms <- c("clustering", "objective", "isolation", "clusinfo", "silinfo")
nm2 <- c("medoids", "id.med", nms)
nm3 <- nm2[- pmatch("obj", nm2)]

(x <- x0 <- cbind(V1 = (-3:4)^2, V2 = c(0:6,NA), V3 = c(1,2,NA,7,NA,8:9,8)))
(px <- pam(x,2, metric="manhattan"))
stopifnot(identical(x,x0))# DUP=FALSE ..
pd <-  pam(dist(x,"manhattan"), 2)
px2 <- pam(x,2, metric="manhattan", keep.diss=FALSE, keep.data=FALSE)
pdC <- pam(x,2, metric="manhattan", cluster.only = TRUE)
p1  <- pam(x,1, metric="manhattan")

stopifnot(identical(px[nms], pd[nms]),
	  identical(px[nms], px2[nms]),
	  identical(pdC, px2$clustering),
	  ## and for default dist "euclidean":
	  identical(pam(x,	2)[nms],
		    pam(dist(x),2)[nms]),
	  identical(p1[c("id.med", "objective", "clusinfo")],
		    list(id.med = 6L, objective = c(build=9.25, swap=9.25),
			 clusinfo = array(c(8, 18, 9.25, 45, 0), dim = c(1, 5),
			 dimnames=list(NULL, c("size", "max_diss", "av_diss",
			 "diameter", "separation"))))),
	  p1$clustering == 1, is.null(p1$silinfo)
	  )

set.seed(253)
## generate 250 objects, divided into 2 clusters.
x <- rbind(cbind(rnorm(120, 0,8), rnorm(120, 0,8)),
	   cbind(rnorm(130,50,8), rnorm(130,10,8)))

.proctime00 <- proc.time()

summary(px2 <- pam(x, 2))
pdx <- pam(dist(x), 2)
all.equal(px2[nms], pdx[nms], tol = 1e-12) ## TRUE
pdxK <- pam(dist(x), 2, keep.diss = TRUE)
stopifnot(identical(pdx[nm2], pdxK[nm2]))

spdx <- silhouette(pdx)
summary(spdx)
spdx
postscript("pam-tst.ps")
if(FALSE)
    plot(spdx)# the silhouette
## is now identical :
plot(pdx)# failed in 1.7.0 -- now only does silhouette

par(mfrow = 2:1)
## new 'dist' argument for clusplot():
plot(pdx, dist=dist(x))
## but this should work automagically (via eval()) as well:
plot(pdx)
## or this
clusplot(pdx)

data(ruspini)
summary(pr4 <- pam(ruspini, 4))
(pr3 <- pam(ruspini, 3))
(pr5 <- pam(ruspini, 5))

data(votes.repub)
summary(pv3 <- pam(votes.repub, 3))
(pv4 <- pam(votes.repub, 4))
(pv6 <- pam(votes.repub, 6, trace = 3))

cat('Time elapsed: ', proc.time() - .proctime00,'\n')

## re-starting with medoids from pv6  shouldn't change:
pv6. <- pam(votes.repub, 6, medoids = pv6$id.med, trace = 3)
identical(pv6[nm3], pv6.[nm3])

## This example seg.faulted at some point:
d.st <- data.frame(V1= c(9, 12, 12, 15, 9, 9, 13, 11, 15, 10, 13, 13,
		       13, 15,  8, 13, 13, 10, 7, 9, 6, 11, 3),
		   V2= c(5, 9, 3, 5, 1, 1, 2, NA, 10, 1, 4, 7,
		       4, NA, NA, 5, 2, 4, 3, 3, 6, 1, 1),
		   V3 = c(63, 41, 59, 50, 290, 226, 60, 36, 32, 121, 70, 51,
		       79, 32, 42, 39, 76, 60, 56, 88, 57, 309, 254),
		   V4 = c(146, 43, 78, 88, 314, 149, 78, NA, 238, 153, 159, 222,
		       203, NA, NA, 74, 100, 111, 9, 180, 50, 256, 107))
dd <- daisy(d.st, stand = TRUE)
(r0 <- pam(dd, 5))# cluster 5 = { 23 } -- on single observation
## pam doing the "daisy" computation internally:
r0s <- pam(d.st, 5, stand=TRUE, keep.diss=FALSE, keep.data=FALSE)
(ii <- which(names(r0) %in% c("call","medoids")))
stopifnot(all.equal(r0[-ii], r0s[-ii], tol=1e-14),
          identical(r0s$medoids, data.matrix(d.st)[r0$medoids, ]))

## This gave only 3 different medoids -> and seg.fault:
(r5 <- pam(dd, 5, medoids = c(1,3,20,2,5), trace = 2)) # now "fine"

dev.off()

##------------------------ Testing pam() with new "pamonce" argument:

## This is from "next version of Matrix" test-tools-1.R:
showSys.time <- function(expr) {
    ## prepend 'Time' for R CMD Rdiff
    st <- system.time(expr)
    writeLines(paste("Time", capture.output(print(st))))
    invisible(st)
}
show6Ratios <- function(...) {
    stopifnot(length(rgs <- list(...)) == 6,
              nchar(ns <- names(rgs)) > 0)
    r <- round(cbind(..1, ..2, ..3, ..4, ..5, ..6)[c(1,5),], 5)
    dimnames(r) <- list(paste("Time ", rownames(r)), ns)
    r
}


n <- 1000
## If not enough cases, all CPU times equals 0.
n <- 500 # for now, and automatic testing

sd <- 0.5
set.seed(13)
n2 <- as.integer(round(n * 1.5))
x <- rbind(cbind(rnorm( n,0,sd), rnorm( n,0,sd)),
           cbind(rnorm(n2,5,sd), rnorm(n2,5,sd)),
           cbind(rnorm(n2,7,sd), rnorm(n2,7,sd)),
           cbind(rnorm(n2,9,sd), rnorm(n2,9,sd)))


## original algorithm
st0 <- showSys.time(pamx      <- pam(x, 4,               trace.lev=2))# 8.157   0.024   8.233
 ## bswapPamOnce  algorithm
st1 <- showSys.time(pamxonce  <- pam(x, 4, pamonce=TRUE, trace.lev=2))# 6.122   0.024   6.181
## bswapPamOnceDistIndice
st2 <- showSys.time(pamxonce2 <- pam(x, 4, pamonce = 2,  trace.lev=2))# 4.101   0.024   4.151
## bswapPamSchubert FastPAM1
st3 <- showSys.time(pamxonce3 <- pam(x, 4, pamonce = 3,  trace.lev=2))#
## bswapPamSchubert FastPAM2
st4 <- showSys.time(pamxonce4 <- pam(x, 4, pamonce = 4,  trace.lev=2))#
## bswapPamSchubert FastPAM2 with linearized memory access
st5 <- showSys.time(pamxonce5 <- pam(x, 4, pamonce = 5,  trace.lev=2))#
## bswapPamSchubert FasterPAM
st6 <- showSys.time(pamxonce6 <- pam(x, 4, pamonce = 6,  trace.lev=2))#
show6Ratios('6:orig' = st6/st0, '5:orig' = st5/st0, '4:orig' = st4/st0, '3:orig' = st3/st0, '2:orig' = st2/st0, '1:orig' = st1/st0)

## only call element is not equal
(icall <- which(names(pamx) == "call"))
pamx[[icall]]
stopifnot(all.equal(pamx    [-icall],  pamxonce [-icall]),
	  all.equal(pamxonce[-icall],  pamxonce2[-icall]),
	  all.equal(pamxonce[-icall],  pamxonce3[-icall]),
	  all.equal(pamxonce[-icall],  pamxonce4[-icall]),
	  all.equal(pamxonce[-icall],  pamxonce5[-icall]),
	  all.equal(pamxonce[-icall],  pamxonce6[-icall]))

## Same using specified medoids
(med0 <- 1 + round(n* c(0,1, 2.5, 4)))#                                      lynne (~ 2010, AMD Phenom II X4 925)
st0 <- showSys.time(pamxst      <- pam(x, 4, medoids = med0,               trace.lev=2))#  13.071   0.024  13.177
st1 <- showSys.time(pamxoncest  <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))#   8.503   0.024   8.578
st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2,    trace.lev=2))#   5.587   0.025   5.647
st3 <- showSys.time(pamxonce3st <- pam(x, 4, medoids = med0, pamonce=3,    trace.lev=2))#
st4 <- showSys.time(pamxonce4st <- pam(x, 4, medoids = med0, pamonce=4,    trace.lev=2))#
st5 <- showSys.time(pamxonce5st <- pam(x, 4, medoids = med0, pamonce=5,    trace.lev=2))#
st6 <- showSys.time(pamxonce6st <- pam(x, 4, medoids = med0, pamonce=6,    trace.lev=2))#
show6Ratios('6:orig' = st6/st0, '5:orig' = st5/st0, '4:orig' = st4/st0, '3:orig' = st3/st0, '2:orig' = st2/st0, '1:orig' = st1/st0)

## only call element is not equal
stopifnot(all.equal(pamxst    [-icall], pamxoncest [-icall]),
          all.equal(pamxoncest[-icall], pamxonce2st[-icall]),
          all.equal(pamxoncest[-icall], pamxonce3st[-icall]),
          all.equal(pamxoncest[-icall], pamxonce4st[-icall]),
          all.equal(pamxoncest[-icall], pamxonce5st[-icall]),
          all.equal(pamxoncest[-icall], pamxonce6st[-icall]))

## Different starting values
med0 <- 1:4 #                                                               lynne (~ 2010, AMD Phenom II X4 925)
st0 <- showSys.time(pamxst      <- pam(x, 4, medoids = med0,               trace.lev=2))# 13.416   0.023  13.529
st1 <- showSys.time(pamxoncest  <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))#  8.384   0.024   8.459
st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2,    trace.lev=2))#  5.455   0.030   5.520
st3 <- showSys.time(pamxonce3st <- pam(x, 4, medoids = med0, pamonce=3,    trace.lev=2))#
st4 <- showSys.time(pamxonce4st <- pam(x, 4, medoids = med0, pamonce=4,    trace.lev=2))#
st5 <- showSys.time(pamxonce5st <- pam(x, 4, medoids = med0, pamonce=5,    trace.lev=2))#
st6 <- showSys.time(pamxonce6st <- pam(x, 4, medoids = med0, pamonce=6,    trace.lev=2))#
show6Ratios('6:orig' = st6/st0, '5:orig' = st5/st0, '4:orig' = st4/st0, '3:orig' = st3/st0, '2:orig' = st2/st0, '1:orig' = st1/st0)

## only call element is not equal
stopifnot(all.equal(pamxst    [-icall], pamxoncest [-icall]),
          all.equal(pamxoncest[-icall], pamxonce2st[-icall]),
          all.equal(pamxoncest[-icall], pamxonce3st[-icall]),
          all.equal(pamxoncest[-icall], pamxonce4st[-icall]),
          all.equal(pamxoncest[-icall], pamxonce5st[-icall]),
          all.equal(pamxoncest[-icall], pamxonce6st[-icall]))


## Medoid bug  --- MM: Fixed, well "0L+ hack", in my pam.q, on 2012-01-31
## ----------
med0 <- (1:6)
st0 <- showSys.time(pamxst   <- pam(x, 6, medoids = med0 ,            trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st1 <- showSys.time(pamxst.1 <- pam(x, 6, medoids = med0 , pamonce=1, trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st2 <- showSys.time(pamxst.2 <- pam(x, 6, medoids = med0 , pamonce=2, trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st3 <- showSys.time(pamxst.3 <- pam(x, 6, medoids = med0 , pamonce=3, trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st4 <- showSys.time(pamxst.4 <- pam(x, 6, medoids = med0 , pamonce=4, trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st5 <- showSys.time(pamxst.5 <- pam(x, 6, medoids = med0 , pamonce=5, trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st6 <- showSys.time(pamxst.6 <- pam(x, 6, medoids = med0 , pamonce=6, trace.lev=2))
stopifnot(identical(med0, 1:6))
stopifnot(all.equal(pamxst[-icall], pamxst.1 [-icall]),
          all.equal(pamxst[-icall], pamxst.2 [-icall]),
          all.equal(pamxst[-icall], pamxst.3 [-icall]),
          all.equal(pamxst[-icall], pamxst.4 [-icall]),
          all.equal(pamxst[-icall], pamxst.5 [-icall]))
# FasterPAM finds a better solution here, by chance
stopifnot(pamxst$objective >= pamxst.6$objective)


## Last Line:
cat('Time elapsed: ', proc.time() - .proctime00,'\n')