File: tmcd.R

package info (click to toggle)
robustbase 0.8-1-1-1
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 3,156 kB
  • sloc: fortran: 2,553; ansic: 2,419; makefile: 1
file content (98 lines) | stat: -rw-r--r-- 3,065 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
library(robustbase)

source(system.file("test_MCD.R", package = "robustbase"))#-> doMCDdata
##          ../inst/test_MCD.R

## -- now do it:
options(digits = 5)
set.seed(101) # <<-- sub-sampling algorithm now based on R's RNG and seed
doMCDdata()
##                vvvv no timing for 'R CMD Rdiff' outputs
doMCDdata(nrep = 12, time=FALSE)
doMCDdata(nrep = 12, time=FALSE, method = "MASS")

###--- now the "close to singular" mahalanobis case:
(c3 <- covMcd(mort3))
## rescale variables:
scaleV <- c(0.1, 0.1, 1, 1, .001, 0.1, 0.1, 100)
mm <- data.matrix(mort3) * rep(scaleV, each = nrow(mort3))
C3 <- covMcd(mm)
stopifnot(C3$mcd.wt == c3$mcd.wt)
try(## error: with "old default tolerance:
  covMcd(mm, control= rrcov.control(tol = 1e-10))
)

cat('Time elapsed: ', proc.time(),'\n') # for ``statistical reasons''

## "large" examples using different algo branches {seg.fault in version 0.4-4}:
set.seed(1)

n <- 600 ## - partitioning will be triggered
X <- matrix(round(100*rnorm(n * 3)), n, 3)
cX <- covMcd(X)
cX
n <- 2000 ## - nesting will be triggered
X <- matrix(round(100*rnorm(n * 3)), n, 3)
cX <- covMcd(X)
cX

cat('Time elapsed: ', proc.time(),'\n')


## Now, some small sample cases:

## maximal values:
n. <- 10
p. <-  8
set.seed(44)
(X. <- cbind(1:n., round(10*rt(n.,3)), round(10*rt(n.,2)),
             matrix(round(10*rnorm(n. * (p.-3)), 1),  nrow = n., ncol = p.-3)))

## 2 x 1 ---> Error
r <- try(covMcd(X.[1:2, 2, drop=FALSE]), silent=TRUE)
stopifnot(inherits(r, "try-error"),
          grep("too small sample size", r) == 1)

## 3 x 2 --- ditto
r <- try(covMcd(X.[1:3, 2:3]), silent=TRUE)
stopifnot(inherits(r, "try-error"),
          grep("too small sample size", r) == 1)

## 5 x 3  [ n < 2 p  ! ]  --- also works for MASS
X <- X.[1:5, 1:3]
set.seed(101)
## the finite-sample correction is definitely doubtful:
(cc <- covMcd(X, use.correction = FALSE))
str(cc) ## best = 2 3 4 5
mcc <- MASS::cov.mcd(X)
stopifnot(cc$best == mcc$best,
          all.equal(cc$center, mcc$center, tol = 1e-10),
          all.equal(c(mcc$cov / cc$raw.cov), rep(0.673549282206, 3*3)))

## p = 4 -- 6 x 4 & 7 x 4  [ n < 2 p  ! ]
p <- 4
n <- 7
X <- X.[1:n, 1+(1:p)]
stopifnot(dim(X) == c(n,p))
(cc <- covMcd(X, use.correction = FALSE))
str(cc) ## best = 1 2 4 5 6 7
mcc <- MASS::cov.mcd(X)
stopifnot(cc$best == mcc$best,
          all.equal(cc$center, mcc$center, tol = 1e-10),
          all.equal(c(mcc$cov / cc$raw.cov), rep(0.7782486992881, p*p)))
n <- 6
X <- X[1:n,]
(cc <- covMcd(X, use.correction = FALSE))
mcc <- MASS::cov.mcd(X)
stopifnot(cc$best == mcc$best,
          all.equal(cc$center, mcc$center, tol = 1e-10),
          all.equal(c(mcc$cov / cc$raw.cov), rep(0.7528695976179, p*p)))

cat('Time elapsed: ', proc.time(),'\n') # for ``statistical reasons''

## nsamp = "exact" -- here for p=7
coleman.x <- data.matrix(coleman[, 1:6])
cat('Time : ', system.time(CcX <- covMcd(coleman.x, nsamp="exact")),
    "\n")# ~ 3 sec. on a fast 2003 machine (Intel Xeon 2400 MHz)
stopifnot(all.equal(CcX$best,
                    c(2, 5:9, 11,13, 14:16, 19:20), tol=0))