File: determinMCD.R

package info (click to toggle)
robustbase 0.99-4-1-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 4,552 kB
  • sloc: fortran: 3,245; ansic: 3,243; sh: 15; makefile: 2
file content (134 lines) | stat: -rw-r--r-- 4,561 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
library(robustbase)

source(system.file("xtraR/test_MCD.R", package = "robustbase"))#-> doMCDdata()

##' This version of domcd() runs *both* "Fast" and "deterministic" MCD
##' @title covMcd() "workhorse" function -- *passed* to and from  doMCDdata()
##' @param x data set: n x p numeric matrix
##' @param xname "promise" which will be substituted() and printed
##' @param nrep number of repetition: only sensible for *timing*
##' @param time
##' @param short
##' @param full
##' @param lname optional:
##' @param seed  optional:
##' @param trace optional:
domcd.2 <- function(x, xname, nrep=1,
                    do.exact = NULL, # <- smart default, globally customizable
                    time   = get("time",   parent.frame()), # compromise
                    short  = get("short",  parent.frame()), # compromise
                    full   = get("full",   parent.frame()), # compromise
                    lname=20, seed=123, trace=FALSE)
{
    if(short && full)
	stop("you should not set both 'full' and 'short' to TRUE")
    force(xname)# => evaluate when it is a data(<>, ..) call
    n <- dim(x)[1]
    p <- dim(x)[2]
    metha <- "FastMCD"
    methb <- "detMCD"
    if(is.null(do.exact)) {
        nLarge <- if(exists("nLarge", mode="numeric"))
                      get("nLarge", mode="numeric") else 5000
        do.exact <- choose(n, p+1L) < nLarge
    }
    set.seed(seed); mcda <- covMcd(x, trace=trace)
    set.seed(seed); mcdb <- covMcd(x, nsamp="deterministic", trace=trace)
    if(do.exact) {
	methX <- "exactMCD"
        set.seed(seed); mcdX <- covMcd(x, nsamp="exact", trace=trace)
    }
    mkRes <- function(mcd)
	sprintf("%3d %3d %3d %12.6f\n", n,p, mcd$quan, mcd$crit)
    xresa <- mkRes(mcda)
    xresb <- mkRes(mcdb)
    if(do.exact) xresX <- mkRes(mcdX)
    if(time) {
        tim1 <- function(meth)
            sprintf("%10.3f\n", system.time(repMCD(x, nrep, meth))[1]/nrep)
        xresa <- paste(xresa, tim1(metha))
        xresb <- paste(xresb, tim1(methb))
        if(do.exact) xresX <- paste(xresX, tim1(methX))
    }
    if(full) {
	header <- get("header", parent.frame())
	header(time)
    }
    ## lname: must fit to header():
    x.meth <- paste(xname, format(c(metha, methb, if(do.exact) methX)))
    cat(sprintf("%*s", lname, x.meth[1]), xresa)
    cat(sprintf("%*s", lname, x.meth[2]), xresb)
    if(do.exact) cat(sprintf("%*s", lname, x.meth[3]), xresX)
    cat("Best subsamples: \n")
    cat(sprintf(" %10s: ", metha)); print(mcda$best)
    if(identical(mcdb$best, mcda$best))
	cat(sprintf(" %s is the same as %s\n", methb, metha))
    else {
	cat(sprintf(" %10s: ", methb)); print(mcdb$best)
	cat(sprintf(" Difference  %s - %s:", methb, metha))
	print(setdiff(mcdb$best, mcda$best))
    }
    if(do.exact) {
	if(identical(mcda$best, mcdX$best))
	    cat(sprintf(" %s is the same as %s\n", methX, metha))
	else if(identical(mcdb$best, mcdX$best))
	    cat(sprintf(" %s is the same as %s\n", methX, methb))
	else {
	    cat(sprintf(" %10s: ", methX)); print(mcdX$best)
        }
    }
    if(!short) {
        cat("Details about", metha,": ")
        ibad <- which(mcda$wt==0)
        names(ibad) <- NULL
        nbad <- length(ibad)
        cat("Outliers: ",nbad,"\n")
        if(nbad > 0)
            print(ibad)
        if(full){
            cat("-------------\n")
            print(mcda)
        }
        cat("--------------------------------------------------------\n")
    }
}

doMCDdata(domcd = domcd.2)
warnings() ## in one example  n < 2 * p ..

###' Test the exact fit property of CovMcd --------------------------------

##' generate "exact fit" data
d.exact <- function(seed=seed, p=2) {
    stopifnot(p >= 1)
    set.seed(seed)
    n1 <- 45
    x1 <- matrix(rnorm(p*n1), nrow=n1, ncol=p)
    x1[,p] <- x1[,p] + 3
    n2 <- 55
    m2 <- 3
    x <- rbind(x1, cbind(matrix(rnorm((p-1)*n2), n2, p-1), rep(m2,n2)))
    colnames(x) <- paste0("X", 1:p)
    x
}

plot(d.exact(18, p=2))
pairs(d.exact(1234, p=3), gap=0.1)

for(p in c(2,4))
for(sid in c(2, 4, 18, 1234)) {
    cat("\nseed = ",sid,"; p = ",p,":\n")
    d.x <- d.exact(sid, p=p)
    d2 <- covMcd(d.x)
    ## Gave error {for p=2, seeds 2, 4, 18 also on 64-bit}:
    ## At line 729 of file rffastmcd.f
    ## Fortran runtime error: Index '6' of dimension 1 of array 'z' above upper bound of 4
    print(d2)
    if(FALSE) ## FIXME fails when calling eigen() in "r6pack()"
        d2. <- covMcd(d.x, nsamp = "deterministic", scalefn = Qn)
    stopifnot(d2$singularity$kind == "on.hyperplane")
}


## TODO: also get examples of other singularity$kind's