File: ncomp.R

package info (click to toggle)
r-cran-pls 2.7-3-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,016 kB
  • sloc: sh: 13; makefile: 2
file content (80 lines) | stat: -rw-r--r-- 3,324 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
randomiz.test <- function(residualsNew, residualsReference, nperm) {
    d <- residualsNew^2 - residualsReference^2
    md <- mean(d)
    N <- length(d)

    signs <- round(matrix(runif(N * nperm), N, nperm)) * 2 - 1
    dsigns <- d * signs
    mdsigns <- colMeans(dsigns)

    count <- sum(mdsigns >= md) ## equality will never occur, right?
    (count + .5) / (nperm + 1)
}

## Starting from the minimum in the CV results do formal randomization
## tests for the models with fewer components or apply the one-sigma
## criterion. This is done on the validation data. At this moment only
## for univariate Y, and for the "CV" estimate.
selectNcomp <- function(object,
                        method = c("randomization", "onesigma"),
                        nperm = 999, alpha = 0.01, ncomp = object$ncomp,
                        plot = FALSE, ...) {
    if (!isTRUE(object$validation$method == "CV"))
        stop("No cross-validation data available in model")
    ## check that Y is univariate
    if (dim(residuals(object))[2] > 1)
        stop("Only univariate response supported")

    rmseps <- c(RMSEP(object, "CV")$val) ## includes zero
    maxIdx <- ncomp + 1
    absBest <- which.min(rmseps[seq_len(maxIdx)])

    if (absBest > 0) {
        method <- match.arg(method)
        if (is.null(origResponse <- object$y))
            origResponse <-
                c(predict(object, ncomp = 1) + residuals(object)[,1,1])

        ## include LOO prediction with zero components (i.e., the
        ## mean). For the mean we should also use the LOO estimate...
        allresids <- cbind(origResponse -
                               (sum(origResponse) - origResponse) /
                                   (length(origResponse) - 1),
                           object$validation$pred[,1,] - origResponse)

        if (method == "randomization") {
            pvals <- sapply(seq_len(absBest - 1),
                            function(ii) randomiz.test(allresids[,ii],
                                                       allresids[,absBest],
                                                       nperm = nperm))
            idx <- which(pvals > alpha)
            selection <- min(c(idx, absBest)) - 1
        } else {
            residsds <- apply(allresids, 2, sd) / sqrt(nrow(allresids))
            uls <- rmseps - residsds
            selection <- min(which(uls < rmseps[absBest])) - 1
        }

        if (isTRUE(plot)) {
            xvals <- seq_along(rmseps) - 1
            plot(xvals, rmseps, ylab = "RMSEP",
                 xlab = "Number of components", type = "b", ...)
            if (method == "onesigma") {
                arrows(xvals, uls, xvals, rmseps + residsds,
                       code = 3, col = "gray", angle = 90, length = .1)
            } else {
                points(xvals[idx], rmseps[idx], cex = 2, col = 4)
            }
            abline(h = rmseps[absBest], col = "gray", lty = 3)
            abline(v = absBest - 1, col = "gray", lty = 3)
            abline(v = selection, col = "blue", lty = 2)
            legend("topright", legend = c("Abs. minimum", "Selection"),
                   lty = 3:2, col = c("gray", "blue"), bg = "white")
        }

        selection
    } else {
        warning("Lowest CV error found at 0 components, no testing performed")
        0
    }
}