File: tolEllipse.R

package info (click to toggle)
robustbase 0.99-7-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,596 kB
  • sloc: fortran: 3,245; ansic: 3,243; sh: 15; makefile: 2
file content (133 lines) | stat: -rw-r--r-- 4,609 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#### This is from the R package
####
####  rrcov : Scalable Robust Estimators with High Breakdown Point
####
#### by Valentin Todorov

### This program is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; either version 2 of the License, or
### (at your option) any later version.
###
### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
### GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License
### along with this program; if not, a copy is available at
### http://www.r-project.org/Licenses/

##   I would like to thank Peter Filzmoser for providing the initial code of
##   this function.

tolEllipsePlot <-
    function(x, m.cov = covMcd(x), cutoff = NULL, id.n = NULL,
	     classic = FALSE, tol = 1e-07,
	     xlab = "", ylab = "", main = "Tolerance ellipse (97.5%)",
	     txt.leg = c("robust", "classical"),
	     col.leg = c("red", "blue"),
	     lty.leg = c("solid","dashed"))
{

##@bdescr
## Tolerance Ellipse Plot:
##    Plots the 97.5% tolerance ellipse of the bivariate data set (x).
##    The ellipse is defined by those data points whose distance (dist)
##    is equal to the squareroot of the 97.5% chisquare quantile with
##    2 degrees of freedom.

##@edescr
##
##@in  x	: [matrix] A data.frame or matrix, n > 2*p
##@in  m.cov	: [mcd object] An object of type mcd - its attributes
##				center and cov will be used
##@in  cutoff	: [number] Distance needed to flag data points outside the ellipse
##@in  outflag	: [logical] Whether to print the labels of the outliers
##@in  tol	: [number] tolerance to be used for computing the inverse see 'solve'.
##		    defaults to 1e-7

## MM: This is nothing else but a version  cluster::ellipsoidPoints() !! -- FIXME
    ellips <- function(loc, cov) {
	## calculates a 97,5% ellipsoid
	## input: data set, location and covariance estimate, cutoff

	dist <- sqrt(qchisq(0.975, 2))
	A <- solve(cov)
	eA <- eigen(A)
	ev <- eA$values
	lambda1 <- max(ev)
	lambda2 <- min(ev)
	eigvect <- eA$vectors[, order(ev)[2]]
	z <- seq(0, 2 * pi, by = 0.01)
	z1 <- dist/sqrt(lambda1) * cos(z)
	z2 <- dist/sqrt(lambda2) * sin(z)
	alfa <- atan(eigvect[2]/eigvect[1])
	r <- matrix(c(cos(alfa),  - sin(alfa), sin(alfa), cos(alfa)), ncol = 2)
	t(loc + t(cbind(z1, z2) %*% r))	#   xmin <- min(x, z[, 1])
    }

    ##	parameters and preconditions

    if(is.data.frame(x))
        x <- data.matrix(x)
    if(!is.matrix(x) || !is.numeric(x))
        stop("x is not a numeric dataframe or matrix.")

    n <- dim(x)[1]
    p <- dim(x)[2]

    if(p != 2)
	stop("Dimension {= ncol(x)} must be 2!")

    if(!is.numeric(m.cov$center) ||  !is.numeric(m.cov$cov))
	stop("argument 'm.cov' must have numeric components 'center' and 'cov'")

    x.loc <- m.cov$center
    x.cov <- n/(n - 1) * m.cov$cov
    xM <- colMeans(x)
    z1 <- ellips(loc = xM, cov = n/(n - 1) * cov.wt(x)$cov)
    z2 <- ellips(loc = x.loc, cov = x.cov)
    x1 <- c(min(x[, 1], z1[, 1], z2[, 1]), max(x[,1],z1[,1], z2[,1]))
    y1 <- c(min(x[, 2], z1[, 2], z2[, 2]), max(x[,2],z1[,2], z2[,2]))

    md <- sqrt(mahalanobis(x, xM, cov(x), tol=tol))
    rd <- sqrt(mahalanobis(x,m.cov$center, m.cov$cov, tol=tol))

    ## Note: the *calling* function may pass a 'missing' value
    if(missing(cutoff) || is.null(cutoff))
	cutoff <- sqrt(qchisq(0.975, df = 2))
    if(missing(id.n) || is.null(id.n))
	id.n <- sum(rd > cutoff)

    ### (2,1) is wrong for 'classic' -- we *overplot*:
    ## if(classic)
    ##  opr <- if(prod(par("mfrow"))== 1) par(mfrow=c(1,2), pty="m") else list()
    ## MM: this is *NOT* good :
    ## else par(mfrow = c(1, 1))

##  1. Robust tolerance
##  define the plot, plot a box, plot the "good" points,
##  plot the outliers either as points or as numbers depending on outflag,
##  plot the ellipse, write a title of the plot
    plot(x, xlim = x1, ylim = y1, xlab = xlab, ylab = ylab, main = main)
    box()
    xrange <- par("usr")
    xrange <- xrange[2] - xrange[1]
    if(id.n >= 1) {
	ind <- sort(rd, index.return=TRUE)$ix[(n-id.n+1):n]
	text(x[ind, 1] + xrange/50, x[ind, 2], ind)
    }

    points(z2, type = "l", lty=lty.leg[1], col=col.leg[1])

##  2. Classical tolerance
    if(classic){
	points(z1, type = "l", lty=lty.leg[2], col=col.leg[2])
	legend("bottomright", txt.leg, lty = lty.leg, col = col.leg)

        ## par(opr)
    }

    invisible()
}