1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
#### This is from the R package
####
#### rrcov : Scalable Robust Estimators with High Breakdown Point
####
#### by Valentin Todorov
### This program is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; either version 2 of the License, or
### (at your option) any later version.
###
### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
### GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License
### along with this program; if not, a copy is available at
### http://www.r-project.org/Licenses/
## I would like to thank Peter Filzmoser for providing the initial code of
## this function.
tolEllipsePlot <-
function(x, m.cov = covMcd(x), cutoff = NULL, id.n = NULL,
classic = FALSE, tol = 1e-07,
xlab = "", ylab = "", main = "Tolerance ellipse (97.5%)",
txt.leg = c("robust", "classical"),
col.leg = c("red", "blue"),
lty.leg = c("solid","dashed"))
{
##@bdescr
## Tolerance Ellipse Plot:
## Plots the 97.5% tolerance ellipse of the bivariate data set (x).
## The ellipse is defined by those data points whose distance (dist)
## is equal to the squareroot of the 97.5% chisquare quantile with
## 2 degrees of freedom.
##@edescr
##
##@in x : [matrix] A data.frame or matrix, n > 2*p
##@in m.cov : [mcd object] An object of type mcd - its attributes
## center and cov will be used
##@in cutoff : [number] Distance needed to flag data points outside the ellipse
##@in outflag : [logical] Whether to print the labels of the outliers
##@in tol : [number] tolerance to be used for computing the inverse see 'solve'.
## defaults to 1e-7
## MM: This is nothing else but a version cluster::ellipsoidPoints() !! -- FIXME
ellips <- function(loc, cov) {
## calculates a 97,5% ellipsoid
## input: data set, location and covariance estimate, cutoff
dist <- sqrt(qchisq(0.975, 2))
A <- solve(cov)
eA <- eigen(A)
ev <- eA$values
lambda1 <- max(ev)
lambda2 <- min(ev)
eigvect <- eA$vectors[, order(ev)[2]]
z <- seq(0, 2 * pi, by = 0.01)
z1 <- dist/sqrt(lambda1) * cos(z)
z2 <- dist/sqrt(lambda2) * sin(z)
alfa <- atan(eigvect[2]/eigvect[1])
r <- matrix(c(cos(alfa), - sin(alfa), sin(alfa), cos(alfa)), ncol = 2)
t(loc + t(cbind(z1, z2) %*% r)) # xmin <- min(x, z[, 1])
}
## parameters and preconditions
if(is.data.frame(x))
x <- data.matrix(x)
if(!is.matrix(x) || !is.numeric(x))
stop("x is not a numeric dataframe or matrix.")
n <- dim(x)[1]
p <- dim(x)[2]
if(p != 2)
stop("Dimension {= ncol(x)} must be 2!")
if(!is.numeric(m.cov$center) || !is.numeric(m.cov$cov))
stop("argument 'm.cov' must have numeric components 'center' and 'cov'")
x.loc <- m.cov$center
x.cov <- n/(n - 1) * m.cov$cov
xM <- colMeans(x)
z1 <- ellips(loc = xM, cov = n/(n - 1) * cov.wt(x)$cov)
z2 <- ellips(loc = x.loc, cov = x.cov)
x1 <- c(min(x[, 1], z1[, 1], z2[, 1]), max(x[,1],z1[,1], z2[,1]))
y1 <- c(min(x[, 2], z1[, 2], z2[, 2]), max(x[,2],z1[,2], z2[,2]))
md <- sqrt(mahalanobis(x, xM, cov(x), tol=tol))
rd <- sqrt(mahalanobis(x,m.cov$center, m.cov$cov, tol=tol))
## Note: the *calling* function may pass a 'missing' value
if(missing(cutoff) || is.null(cutoff))
cutoff <- sqrt(qchisq(0.975, df = 2))
if(missing(id.n) || is.null(id.n))
id.n <- sum(rd > cutoff)
### (2,1) is wrong for 'classic' -- we *overplot*:
## if(classic)
## opr <- if(prod(par("mfrow"))== 1) par(mfrow=c(1,2), pty="m") else list()
## MM: this is *NOT* good :
## else par(mfrow = c(1, 1))
## 1. Robust tolerance
## define the plot, plot a box, plot the "good" points,
## plot the outliers either as points or as numbers depending on outflag,
## plot the ellipse, write a title of the plot
plot(x, xlim = x1, ylim = y1, xlab = xlab, ylab = ylab, main = main)
box()
xrange <- par("usr")
xrange <- xrange[2] - xrange[1]
if(id.n >= 1) {
ind <- sort(rd, index.return=TRUE)$ix[(n-id.n+1):n]
text(x[ind, 1] + xrange/50, x[ind, 2], ind)
}
points(z2, type = "l", lty=lty.leg[1], col=col.leg[1])
## 2. Classical tolerance
if(classic){
points(z1, type = "l", lty=lty.leg[2], col=col.leg[2])
legend("bottomright", txt.leg, lty = lty.leg, col = col.leg)
## par(opr)
}
invisible()
}
|