File: NAcoef.R

package info (click to toggle)
robustbase 0.99-6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 4,584 kB
  • sloc: fortran: 3,245; ansic: 3,243; sh: 15; makefile: 2
file content (154 lines) | stat: -rw-r--r-- 6,145 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
## test handing of NA coefficients / singular fits
## also check:
## -- what would have to be done if class "lm" was added.
## -- general compatibility to class lm.
require(robustbase)
options(digits = 5)# -> higher chance of platform independence

## generate simple example data (almost as in ./weights.R )
data <- expand.grid(x1=letters[1:3], x2=LETTERS[1:3], rep=1:3)
set.seed(1)
data$y <- rnorm(nrow(data))
## drop all combinations of one interaction:
data <- subset(data, x1 != 'c' | (x2 != 'B' & x2 != 'C'))
## add collinear variables
data$x3 <- rnorm(nrow(data))
data$x4 <- rnorm(nrow(data))
data$x5 <- data$x3 + data$x4
## add some NA terms
data$y[1] <- NA
data$x4[2:3] <- NA ## to test anova

## Classical models start with 'cm', robust just with  'rm' (or just 'm'):
cm0 <- lm   (y ~ x1*x2 + x3,	       data)
cm1 <- lm   (y ~ x1*x2 + x3 + x4 + x5, data)
set.seed(2)
rm1 <- lmrob(y ~ x1*x2 + x3 + x4 + x5, data)
m3  <- lmrob(y ~ x1*x2 + x3 + x4,      data) # same column space as rm1
rm0 <- lmrob(y ~ x1*x2 + x3,	       data)

## clean version of rm1 (to check predict)
data2 <- data.frame(y=data$y[-(1:3)], rm1$x[,!is.na(rm1$coef)])
set.seed(2)
rm1c <- lmrob(y ~ x1b + x1c + x2B + x2C + x3 + x4 + x1b:x2B + x1b:x2C, data2)

## add class lm to rm1 (for now)
class(rm1) <- c(class(rm1), "lm")
class(rm0) <- c(class(rm0), "lm")

## the full matrix (data) should be returned by model matrix (frame)
stopifnot(all.equal(model.matrix(cm1), model.matrix(rm1)),
          all.equal(model.frame (cm1), model.frame (rm1)))
## qr decomposition should be for the full data and pivots identical lm result
qr.cm1 <- qr(cm1)$qr
qr.rm1 <- rm1$qr$qr
stopifnot(NCOL(qr.rm1) == NCOL(qr.cm1),
          NROW(qr.rm1) == NROW(qr.cm1),
          length(rm1$qr$qraux) == length(qr(cm1)$qraux),
          all.equal(rm1$qr$pivot, qr(cm1)$pivot),
          all.equal(dimnames(qr.rm1),dimnames(qr.cm1)))
## the alias function should return the same result
stopifnot(all.equal(alias(cm1), alias(rm1)))

####
## these helper functions should print NAs for the dropped coefficients
  print(rm1)
summary(rm1) -> s1
confint(rm1) -> ci1
stopifnot(identical(is.na(coef(cm1)), apply(ci1, 1L, anyNA)),
	  identical(sigma(rm1),                 s1$ sigma),
	  identical(vcov(rm1, complete=FALSE),  s1$ cov  ),
	  TRUE)

print(s1, showAlgo=FALSE)
ci1
## drop1 should return df = 0
#drop1(rm1) ## drop.lm does not return valid results (yet)!

####
## methods that should just drop the NA coefficients
## m3 is actually the same as rm1, so anova should raise an error
tools::assertError(anova(rm1, m3, test="Wald"))
tools::assertError(anova(rm1, m3, test="Deviance"))
## but comparing rm1 and rm0 should be ok
anova(rm1, rm0, test="Wald")
anova(rm1, rm0, test="Deviance")
## commands with single #:
## they do (or might) not return sensible results for robust fits
## and need to be checked again
#cooks.distance(rm1)
#deviance(rm1)
#dfbeta(rm1)
#dfbetas(rm1)
#effects(rm1) ## fails
#extractAIC(rm1)
#influence(rm1)
stopifnot(all.equal(hv1 <- hatvalues(rm1), .lmrob.hat(wqr=rm1$qr), tol=1e-15),
          all.equal(hv1, stats:::hatvalues.lm(rm1), tol=1e-15),
          all.equal(hat(cm1$qr), unname(hatvalues(cm1)), tol=1e-15),
          all.equal(unname(hv1), hat(rm1$qr), tol=1e-15),
          ## ditto :
          all.equal(hv1c <- hatvalues(rm1c), stats:::hatvalues.lm(rm1c), tol=1e-15))

## kappa() & labels() :
stopifnot(is.infinite(kr1 <- kappa(rm1)), kr1 == kappa(cm1), # = +Inf both
          identical(labels(rm1), labels(cm1)))
logLik(rm1)# well, and what does it mean?
## plot(rm1, which=1) ## plot.lmrob() fails "singular covariance" .. FIXME!
par(mfrow=c(2,2))
plot(rm1, which=2:4)
stopifnot(all.equal(predict(rm1), predict(rm1c), tol=1e-15),
          all.equal(predict(rm1,  se.fit=TRUE, interval="confidence"),
		    predict(rm1c, se.fit=TRUE, interval="confidence"), tol=4e-15)) # seen 1.3e-15 (ATLAS)
predict(rm1, type="terms", se.fit=TRUE, interval="confidence")
#proj(rm1) ## fails "FIXME"
residuals(rm1)
#rstandard(rm1)
#rstudent(rm1)
#simulate(rm1) ## just $weights needs to be changed to prior weights
V1 <- vcov(rm1, complete=FALSE)
## but don't show the "eigen" part {vectors may flip sign}:
attributes(V1) <- attributes(V1)[c("dim","dimnames", "weights")]; V1
set.seed(12); sc <- simulate(cm1, 64)
set.seed(12); rc <- simulate(rm1, 64)

stopifnot(all.equal(sqrt(diag(V1)), coef(summary(rm1))[,"Std. Error"], tol=1e-15),
	  all.equal(sc, rc, tolerance = 0.08),# dimension *and* approx. values (no NA)
	  identical(variable.names(rm1), variable.names(cm1)),
	  all.equal(residuals(rm1), residuals(cm1), tolerance = 0.05),# incl. names
	  all.equal(rstudent (rm1), rstudent (cm1), tolerance = 0.06),
	  identical(dimnames(rm1), dimnames(cm1)),
	  all.equal(dummy.coef(rm1), dummy.coef(cm1), tolerance= .5)) ## check mostly structure

## other helper functions
stopifnot(identical(case.names(rm1), case.names(cm1)),
          all.equal(family(rm1), family(cm1)),# identical() upto environment
          identical(formula(rm1), formula(cm1)),
          nobs(rm1) == nobs(cm1))
#add1(rm0, ~ . + x3 + x4 + x5) ## does not return valid results (yet)!


## test other initial estimators
lmrob(y ~ x1*x2 + x3 + x4 + x5, data, init="M-S")
lmrob(y ~ x1*x2 + x3 + x4 + x5, data, init=lmrob.lar)

## test all zero design matrix
data <- data.frame(y=1:10,x1=0,x2=0,os=2,w=c(0.5, 1))
(m5 <- lmrob(y ~ 1+x1+x2+offset(os), data, weights=w))
(sm5 <- summary(m5))
(m6 <- lmrob(y ~ 0+x1+x2+offset(os), data, weights=w))
(sm6 <- summary(m6))

sc5 <- summary(cm5 <- lm(y ~ 1+x1+x2+offset(os), data, weights=w))
sc6 <- summary(cm6 <- lm(y ~ 0+x1+x2+offset(os), data, weights=w))

if(getRversion() <= "3.5.1" && as.numeric(R.version$`svn rev`) < 74993)
    ## in the past, lm() returned logical empty matrix
    storage.mode(sc6$coefficients) <- "double"

stopifnot(all.equal(coef(m5), coef(cm5), tolerance = 0.01),
          all.equal(coef(m6), coef(cm6), tolerance = 1e-14),
          all.equal(coef(sm5), coef(sc5), tolerance = 0.05),
          all.equal(coef(sm6), coef(sc6), tolerance = 1e-14),
          identical(sm5$df, sc5$df),
          identical(sm6$df, sc6$df))