1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
'lmorigin' <-
function(formula, data=NULL, origin=TRUE, nperm=999, method=NULL, silent=FALSE)
#
# This program computes a multiple linear regression and performs tests
# of significance of the equation parameters using permutations.
#
# origin=TRUE: the regression line can be forced through the origin. Testing
# the significance in that case requires a special permutation procedure.
#
# Permutation methods: raw data or residuals of full model
# Default method in regression through the origin: raw data
# Default method in ordinary multiple regression: residuals of full model
# - In ordinary multiple regression when m = 1: raw data
#
# Pierre Legendre, March 2009
{
if(!is.null(method)) method <- match.arg(method, c("raw", "residuals"))
if(is.null(method) & origin==TRUE) method <- "raw"
if(is.null(method) & origin==FALSE) method <- "residuals"
if(nperm < 0) stop("Incorrect value for 'nperm'")
## From the formula, find the variables and the number of observations 'n'
toto <- lm(formula, data)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "offset"), names(mf), 0)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
var.names = colnames(mf) # Noms des variables
y <- as.matrix(mf[,1])
colnames(y) <- var.names[1]
X <- as.matrix(mf[,-1])
n <- nrow(mf)
m <- ncol(X)
a <- system.time({
mm<- m # No. regression coefficients, possibly including the intercept
if(m == 1) method <- "raw"
if(nrow(X) != n) stop("Unequal number of rows in y and X")
if(origin) {
if(!silent) cat("Regression through the origin",'\n')
reg <- lm(y ~ 0 + X)
} else {
if(!silent) cat("Multiple regression with estimation of intercept",'\n')
reg <- lm(y ~ X)
mm <- mm+1
}
if(!silent) {
if(nperm > 0) {
if(method == "raw") {
cat("Permutation method =",method,"data",'\n')
} else {
cat("Permutation method =",method,"of full model",'\n')
}
}
}
t.vec <- summary(reg)$coefficients[,3]
p.param.t <- summary(reg)$coefficients[,4]
df1 <- summary(reg)$fstatistic[[2]]
df2 <- summary(reg)$fstatistic[[3]]
F <- summary(reg)$fstatistic[[1]]
y.res <- summary(reg)$residuals
# b.vec <- summary(reg)$coefficients[,1]
# r.sq <- summary(reg)$r.squared
# adj.r.sq <- summary(reg)$adj.r.squared
# p.param.F <- pf(F, df1, df2, lower.tail=FALSE)
if(df1 < m) stop("\nCollinearity among the X variables. Check using 'lm'")
# Permutation tests
if(nperm > 0) {
nGT.F <- 1
nGT1.t <- rep(1,mm)
nGT2.t <- rep(1,mm)
sign.t <- sign(t.vec)
for(i in 1:nperm) # Permute raw data. Always use this method for F-test
{
if(origin) { # Regression through the origin
dia.bin <- diag((rbinom(n,1,0.5)*2)-1)
y.perm <- dia.bin %*% sample(y)
reg.perm <- lm(y.perm ~ 0 + X)
} else { # Multiple linear regression
y.perm <- sample(y,n)
reg.perm <- lm(y.perm ~ X)
}
# Permutation test of the F-statistic
F.perm <- summary(reg.perm)$fstatistic[1]
if(F.perm >= F) nGT.F <- nGT.F+1
# Permutation tests of the t-statistics: permute raw data
if(method == "raw") {
t.perm <- summary(reg.perm)$coefficients[,3]
if(nperm <= 5) cat(t.perm,'\n')
for(j in 1:mm) {
# One-tailed test in direction of sign
if(t.perm[j]*sign.t[j] >= t.vec[j]*sign.t[j]) nGT1.t[j] <- nGT1.t[j]+1
# Two-tailed test
if( abs(t.perm[j]) >= abs(t.vec[j]) ) nGT2.t[j] <- nGT2.t[j]+1
}
}
}
if(method == "residuals") {
# Permute residuals of full model
for(i in 1:nperm) {
if(origin) { # Regression through the origin
dia.bin <- diag((rbinom(n,1,0.5)*2)-1)
y.perm <- dia.bin %*% sample(y.res)
reg.perm <- lm(y.perm ~ 0 + X)
} else { # Multiple linear regression
y.perm <- sample(y.res,n)
reg.perm <- lm(y.perm ~ X)
}
# Permutation tests of the t-statistics: permute residuals
t.perm <- summary(reg.perm)$coefficients[,3]
if(nperm <= 5) cat(t.perm,'\n')
for(j in 1:mm) {
# One-tailed test in direction of sign
if(t.perm[j]*sign.t[j] >= t.vec[j]*sign.t[j]) nGT1.t[j] <- nGT1.t[j]+1
# Two-tailed test
if( abs(t.perm[j]) >= abs(t.vec[j]) ) nGT2.t[j] <- nGT2.t[j]+1
}
}
}
# Compute the permutational probabilities
p.perm.F <- nGT.F/(nperm+1)
p.perm.t1 <- nGT1.t/(nperm+1)
p.perm.t2 <- nGT2.t/(nperm+1)
### Do not test intercept by permutation of residuals in multiple regression
if(!origin & method=="residuals") {
if(silent) { # Note: silent==TRUE in simulation programs
p.perm.t1[1] <- p.perm.t2[1] <- 1
} else {
p.perm.t1[1] <- p.perm.t2[1] <- NA
}
}
}
})
a[3] <- sprintf("%2f",a[3])
if(!silent) cat("Computation time =",a[3]," sec",'\n')
#
if(nperm == 0) {
out <- list(reg=reg, p.param.t.2tail=p.param.t, p.param.t.1tail=p.param.t/2, origin=origin, nperm=nperm, var.names=var.names, call=match.call())
} else {
out <- list(reg=reg, p.param.t.2tail=p.param.t, p.param.t.1tail=p.param.t/2, p.perm.t.2tail=p.perm.t2, p.perm.t.1tail=p.perm.t1, p.perm.F=p.perm.F, origin=origin, nperm=nperm, method=method, var.names=var.names, call=match.call())
}
#
class(out) <- "lmorigin"
out
}
|