1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
|
## test handing of NA coefficients / singular fits
## also check:
## -- what would have to be done if class "lm" was added.
## -- general compatibility to class lm.
require(robustbase)
options(digits = 5)# -> higher chance of platform independence
## generate simple example data (almost as in ./weights.R )
data <- expand.grid(x1=letters[1:3], x2=LETTERS[1:3], rep=1:3)
set.seed(1)
data$y <- rnorm(nrow(data))
## drop all combinations of one interaction:
data <- subset(data, x1 != 'c' | (x2 != 'B' & x2 != 'C'))
## add collinear variables
data$x3 <- rnorm(nrow(data))
data$x4 <- rnorm(nrow(data))
data$x5 <- data$x3 + data$x4
## add some NA terms
data$y[1] <- NA
data$x4[2:3] <- NA ## to test anova
## Classical models start with 'cm', robust just with 'rm' (or just 'm'):
cm0 <- lm (y ~ x1*x2 + x3, data)
cm1 <- lm (y ~ x1*x2 + x3 + x4 + x5, data)
set.seed(2)
rm1 <- lmrob(y ~ x1*x2 + x3 + x4 + x5, data)
m3 <- lmrob(y ~ x1*x2 + x3 + x4, data) # same column space as rm1
rm0 <- lmrob(y ~ x1*x2 + x3, data)
## clean version of rm1 (to check predict)
data2 <- data.frame(y=data$y[-(1:3)], rm1$x[,!is.na(rm1$coef)])
set.seed(2)
rm1c <- lmrob(y ~ x1b + x1c + x2B + x2C + x3 + x4 + x1b:x2B + x1b:x2C, data2)
## add class lm to rm1 (for now)
class(rm1) <- c(class(rm1), "lm")
class(rm0) <- c(class(rm0), "lm")
## the full matrix (data) should be returned by model matrix (frame)
stopifnot(all.equal(model.matrix(cm1), model.matrix(rm1)),
all.equal(model.frame (cm1), model.frame (rm1)))
## qr decomposition should be for the full data and pivots identical lm result
qr.cm1 <- qr(cm1)$qr
qr.rm1 <- rm1$qr$qr
stopifnot(NCOL(qr.rm1) == NCOL(qr.cm1),
NROW(qr.rm1) == NROW(qr.cm1),
length(rm1$qr$qraux) == length(qr(cm1)$qraux),
all.equal(rm1$qr$pivot, qr(cm1)$pivot),
all.equal(dimnames(qr.rm1),dimnames(qr.cm1)))
## the alias function should return the same result
stopifnot(all.equal(alias(cm1), alias(rm1)))
####
## these helper functions should print NAs for the dropped coefficients
print(rm1)
summary(rm1) -> s1
confint(rm1) -> ci1
stopifnot(identical(is.na(coef(cm1)), apply(ci1, 1L, anyNA)),
identical(sigma(rm1), s1$ sigma),
identical(vcov(rm1, complete=FALSE), s1$ cov ),
TRUE)
print(s1, showAlgo=FALSE)
ci1
## drop1 should return df = 0
#drop1(rm1) ## drop.lm does not return valid results (yet)!
####
## methods that should just drop the NA coefficients
## m3 is actually the same as rm1, so anova should raise an error
tools::assertError(anova(rm1, m3, test="Wald"))
tools::assertError(anova(rm1, m3, test="Deviance"))
## but comparing rm1 and rm0 should be ok
anova(rm1, rm0, test="Wald")
anova(rm1, rm0, test="Deviance")
## commands with single #:
## they do (or might) not return sensible results for robust fits
## and need to be checked again
#cooks.distance(rm1)
#deviance(rm1)
#dfbeta(rm1)
#dfbetas(rm1)
#effects(rm1) ## fails
#extractAIC(rm1)
#influence(rm1)
stopifnot(all.equal(hv1 <- hatvalues(rm1), .lmrob.hat(wqr=rm1$qr), tol=1e-15),
all.equal(hv1, stats:::hatvalues.lm(rm1), tol=1e-15),
all.equal(hat(cm1$qr), unname(hatvalues(cm1)), tol=1e-15),
all.equal(unname(hv1), hat(rm1$qr), tol=1e-15),
## ditto :
all.equal(hv1c <- hatvalues(rm1c), stats:::hatvalues.lm(rm1c), tol=1e-15))
## kappa() & labels() :
stopifnot(is.infinite(kr1 <- kappa(rm1)), kr1 == kappa(cm1), # = +Inf both
identical(labels(rm1), labels(cm1)))
logLik(rm1)# well, and what does it mean?
## plot(rm1, which=1) ## plot.lmrob() fails "singular covariance" .. FIXME!
par(mfrow=c(2,2))
plot(rm1, which=2:4)
stopifnot(all.equal(predict(rm1), predict(rm1c), tol=1e-15),
all.equal(predict(rm1, se.fit=TRUE, interval="confidence"),
predict(rm1c, se.fit=TRUE, interval="confidence"), tol=4e-15)) # seen 1.3e-15 (ATLAS)
predict(rm1, type="terms", se.fit=TRUE, interval="confidence")
#proj(rm1) ## fails "FIXME"
residuals(rm1)
#rstandard(rm1)
#rstudent(rm1)
#simulate(rm1) ## just $weights needs to be changed to prior weights
V1 <- vcov(rm1, complete=FALSE)
## but don't show the "eigen" part {vectors may flip sign}:
attributes(V1) <- attributes(V1)[c("dim","dimnames", "weights")]; V1
set.seed(12); sc <- simulate(cm1, 64)
set.seed(12); rc <- simulate(rm1, 64)
stopifnot(all.equal(sqrt(diag(V1)), coef(summary(rm1))[,"Std. Error"], tol=1e-15),
all.equal(sc, rc, tolerance = 0.08),# dimension *and* approx. values (no NA)
identical(variable.names(rm1), variable.names(cm1)),
all.equal(residuals(rm1), residuals(cm1), tolerance = 0.05),# incl. names
all.equal(rstudent (rm1), rstudent (cm1), tolerance = 0.06),
identical(dimnames(rm1), dimnames(cm1)),
all.equal(dummy.coef(rm1), dummy.coef(cm1), tolerance= .5)) ## check mostly structure
## other helper functions
stopifnot(identical(case.names(rm1), case.names(cm1)),
all.equal(family(rm1), family(cm1)),# identical() upto environment
identical(formula(rm1), formula(cm1)),
nobs(rm1) == nobs(cm1))
#add1(rm0, ~ . + x3 + x4 + x5) ## does not return valid results (yet)!
## test other initial estimators
lmrob(y ~ x1*x2 + x3 + x4 + x5, data, init="M-S")
lmrob(y ~ x1*x2 + x3 + x4 + x5, data, init=lmrob.lar)
## test all zero design matrix
data <- data.frame(y=1:10,x1=0,x2=0,os=2,w=c(0.5, 1))
(m5 <- lmrob(y ~ 1+x1+x2+offset(os), data, weights=w))
(sm5 <- summary(m5))
(m6 <- lmrob(y ~ 0+x1+x2+offset(os), data, weights=w))
(sm6 <- summary(m6))
sc5 <- summary(cm5 <- lm(y ~ 1+x1+x2+offset(os), data, weights=w))
sc6 <- summary(cm6 <- lm(y ~ 0+x1+x2+offset(os), data, weights=w))
if(getRversion() <= "3.5.1" && as.numeric(R.version$`svn rev`) < 74993)
## in the past, lm() returned logical empty matrix
storage.mode(sc6$coefficients) <- "double"
stopifnot(all.equal(coef(m5), coef(cm5), tolerance = 0.01),
all.equal(coef(m6), coef(cm6), tolerance = 1e-14),
all.equal(coef(sm5), coef(sc5), tolerance = 0.05),
all.equal(coef(sm6), coef(sc6), tolerance = 1e-14),
identical(sm5$df, sc5$df),
identical(sm6$df, sc6$df))
|