File: test_LTS.R

package info (click to toggle)
robustbase 0.8-1-1-1
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 3,156 kB
  • sloc: fortran: 2,553; ansic: 2,419; makefile: 1
file content (121 lines) | stat: -rw-r--r-- 4,315 bytes parent folder | download | duplicates (11)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#### Utility functions for testing ltsReg()
#### -------------------------------------- ../tests/tlts.R

repLTS <- function(form, data, nrep = 1, method = c("FASTLTS","MASS"))
{
    if(method == "MASS")
        ## MASS::lqs(x,y,control=list(psamp = NA, nsamp= "best", adjust= FALSE))
        for(i in 1:nrep) MASS::lqs(form, data = data, method = "lts")
    else
        ## set mcd=FALSE - we want to time only the LTS algorithm
        for(i in 1:nrep) ltsReg(form, data = data, mcd = FALSE)
}

doLTSdata <- function(nrep = 1, time = nrep >= 3, short = time, full = !short,
		   method = c("FASTLTS", "MASS"))
{
    ##@bdescr
    ## Test the function ltsReg() on the literature datasets:
    ##
    ## Call ltsReg() for "all" regression datasets available in robustbase
    ## and print:
    ##  - execution time (if time)
    ##  - objective function
    ##  - best subsample found (if not short)
    ##  - outliers identified (with cutoff 0.975) (if not short)
    ##  - estimated coeficients and scale (if full)
    ##
    ##@edescr
    ##
    ##@in  nrep    : [integer] number of repetitions to use for estimating the
    ##                         (average) execution time
    ##@in  time    : [boolean] whether to evaluate the execution time
    ##@in  short   : [boolean] whether to do short output (i.e. only the
    ##                         objective function value). If short == FALSE,
    ##                         the best subsample and the identified outliers are
    ##                         printed. See also the parameter full below
    ##@in  full    : [boolean] whether to print the estimated coeficients and scale
    ##@in  method  : [character] select a method: one of (FASTLTS, MASS)


    dolts <- function(form, dname, dataset, nrep = 1) {

        if(missing(dataset)) {
            data(list = dname)
            dataset <- get(dname)
        } else if(missing(dname))
            dname <- deparse(substitute(dataset))
        environment(form) <- environment() ## !?!
        x <- model.matrix(form, model.frame(form, data = dataset))
        dx <- dim(x) - 0:1 # not counting intercept

        if(method == "MASS") {
            lts <- MASS::lqs(form, data = dataset, method = "lts")
            quan <- (dx[1] + (dx[2] + 1) + 1)/2 #default: (n+p+1)/2
        } else {
            lts <- ltsReg(form, data = dataset, mcd = FALSE)
            quan <- lts$quan
        }

        xres <- sprintf("%*s %3d %3d %3d %12.6f",
                        lname, dname, dx[1], dx[2], as.integer(quan), lts$crit)
        if(time) {
            xtime <- system.time(repLTS(form, data = dataset, nrep, method))[1]
            xres <- sprintf("%s %10.1f", xres, 1000 * xtime / nrep)
        }
        cat(xres, "\n")

        if(!short) {
            cat("Best subsample: \n")
            print(lts$best)

            ibad <- which(lts$lts.wt == 0)
            names(ibad) <- NULL
            nbad <- length(ibad)
            cat("Outliers: ",nbad,"\n")
            if(nbad > 0)
                print(ibad)
            if(full) {
                cat("-------------\n")
                print(lts)
                print(summary(lts))
            }
            cat("--------------------------------------------------------\n")
        }
    }

    method <- match.arg(method)

    data(heart)
    data(starsCYG)
    data(phosphor)
    data(stackloss)
    data(coleman)
    data(salinity)
    data(aircraft)
    data(delivery)
    data(wood)
    data(hbk)

    cll <- sys.call()
    cat("\nCall: ", deparse(substitute(cll)),"\n")

    cat("========================================================\n")
    cat("Data Set               n   p  Half      obj    Time [ms]\n")
    cat("========================================================\n")
    ##   1 3 5 7 9.1 3 5 7 9. 123 123
    lname <- 20 ##        --^

    dolts(clength ~ . , "heart", nrep = nrep)
    dolts(log.light ~ log.Te , "starsCYG", nrep = nrep)
    dolts(plant ~ . , "phosphor", nrep = nrep)
    dolts(stack.loss ~ . , "stackloss", nrep = nrep)
    dolts(Y ~ . , "coleman", nrep = nrep)
    dolts(Y ~ . , "salinity")
    dolts(Y ~ . , "aircraft")
    dolts(delTime ~ . , "delivery")
    dolts(y ~ . , "wood", nrep = nrep)
    dolts(Y ~ . , "hbk", nrep = nrep)

    cat("========================================================\n")
}