File: lmorigin.R

package info (click to toggle)
r-cran-ape 5.8-1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,676 kB
  • sloc: ansic: 7,676; cpp: 116; sh: 17; makefile: 2
file content (163 lines) | stat: -rw-r--r-- 5,158 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
'lmorigin' <-
function(formula, data=NULL, origin=TRUE, nperm=999, method=NULL, silent=FALSE)
#
# This program computes a multiple linear regression and performs tests
# of significance of the equation parameters using permutations.
#
# origin=TRUE: the regression line can be forced through the origin. Testing
# the significance in that case requires a special permutation procedure.
#
# Permutation methods: raw data or residuals of full model
#    Default method in regression through the origin: raw data
#    Default method in ordinary multiple regression: residuals of full model
#    - In ordinary multiple regression when m = 1: raw data
#
#         Pierre Legendre, March 2009
{
if(!is.null(method)) method <- match.arg(method, c("raw", "residuals"))
if(is.null(method) & origin==TRUE) method <- "raw"
if(is.null(method) & origin==FALSE) method <- "residuals"
if(nperm < 0) stop("Incorrect value for 'nperm'")

## From the formula, find the variables and the number of observations 'n'
toto <- lm(formula, data)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "offset"), names(mf), 0)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
var.names = colnames(mf)   # Noms des variables
y <- as.matrix(mf[,1])
colnames(y) <- var.names[1]
X <- as.matrix(mf[,-1])
n <- nrow(mf)
m <- ncol(X)

a <- system.time({
mm<- m           # No. regression coefficients, possibly including the intercept
if(m == 1) method <- "raw"
if(nrow(X) != n) stop("Unequal number of rows in y and X")

if(origin) {
	if(!silent) cat("Regression through the origin",'\n')
	reg <- lm(y ~ 0 + X)
	} else {
	if(!silent) cat("Multiple regression with estimation of intercept",'\n')
	reg <- lm(y ~ X)
	mm <- mm+1
	}

if(!silent) {
	if(nperm > 0) {
		if(method == "raw") {
			cat("Permutation method =",method,"data",'\n')
			} else {
			cat("Permutation method =",method,"of full model",'\n')
			}
		}
	}
t.vec      <- summary(reg)$coefficients[,3]
p.param.t  <- summary(reg)$coefficients[,4]
df1        <- summary(reg)$fstatistic[[2]]
df2        <- summary(reg)$fstatistic[[3]]
F          <- summary(reg)$fstatistic[[1]]
y.res      <- summary(reg)$residuals
# b.vec      <- summary(reg)$coefficients[,1]
# r.sq       <- summary(reg)$r.squared
# adj.r.sq   <- summary(reg)$adj.r.squared
# p.param.F  <- pf(F, df1, df2, lower.tail=FALSE)

if(df1 < m) stop("\nCollinearity among the X variables. Check using 'lm'")

# Permutation tests
if(nperm > 0) {

	nGT.F  <- 1
	nGT1.t <- rep(1,mm)
	nGT2.t <- rep(1,mm)
	sign.t <- sign(t.vec)

	for(i in 1:nperm)  # Permute raw data. Always use this method for F-test
		{
		if(origin) {   # Regression through the origin
			dia.bin <- diag((rbinom(n,1,0.5)*2)-1)
			y.perm <- dia.bin %*% sample(y)
			reg.perm <- lm(y.perm ~ 0 + X)
			} else {   # Multiple linear regression
			y.perm <- sample(y,n)
			reg.perm <- lm(y.perm ~ X)
			}

		# Permutation test of the F-statistic
		F.perm <- summary(reg.perm)$fstatistic[1]
		if(F.perm >= F) nGT.F <- nGT.F+1

		# Permutation tests of the t-statistics: permute raw data
		if(method == "raw") {
			t.perm <- summary(reg.perm)$coefficients[,3]
			if(nperm <= 5) cat(t.perm,'\n')
			for(j in 1:mm) {
				# One-tailed test in direction of sign
				if(t.perm[j]*sign.t[j] >= t.vec[j]*sign.t[j]) nGT1.t[j] <- nGT1.t[j]+1
				# Two-tailed test
				if( abs(t.perm[j]) >= abs(t.vec[j]) ) nGT2.t[j] <- nGT2.t[j]+1
				}
			}
		}

	if(method == "residuals") {
	# Permute residuals of full model
		for(i in 1:nperm) {
			if(origin) {   # Regression through the origin
				dia.bin <- diag((rbinom(n,1,0.5)*2)-1)
				y.perm <- dia.bin %*% sample(y.res)
				reg.perm <- lm(y.perm ~ 0 + X)
				} else {   # Multiple linear regression
				y.perm <- sample(y.res,n)
				reg.perm <- lm(y.perm ~ X)
				}

			# Permutation tests of the t-statistics: permute residuals
			t.perm <- summary(reg.perm)$coefficients[,3]
			if(nperm <= 5) cat(t.perm,'\n')
			for(j in 1:mm) {
				# One-tailed test in direction of sign
				if(t.perm[j]*sign.t[j] >= t.vec[j]*sign.t[j]) nGT1.t[j] <- nGT1.t[j]+1
				# Two-tailed test
				if( abs(t.perm[j]) >= abs(t.vec[j]) ) nGT2.t[j] <- nGT2.t[j]+1
				}
			}
		}
	# Compute the permutational probabilities
	p.perm.F <- nGT.F/(nperm+1)
	p.perm.t1 <- nGT1.t/(nperm+1)
	p.perm.t2 <- nGT2.t/(nperm+1)

	### Do not test intercept by permutation of residuals in multiple regression
	if(!origin & method=="residuals") {
		if(silent) {   # Note: silent==TRUE in simulation programs
			p.perm.t1[1] <- p.perm.t2[1] <- 1
			} else {
			p.perm.t1[1] <- p.perm.t2[1] <- NA
			}
		}
	}

})
a[3] <- sprintf("%2f",a[3])
if(!silent) cat("Computation time =",a[3]," sec",'\n')
#
if(nperm == 0) {

	out <- list(reg=reg, p.param.t.2tail=p.param.t, p.param.t.1tail=p.param.t/2, origin=origin, nperm=nperm, var.names=var.names, call=match.call())

	} else {

	out <- list(reg=reg, p.param.t.2tail=p.param.t, p.param.t.1tail=p.param.t/2, p.perm.t.2tail=p.perm.t2, p.perm.t.1tail=p.perm.t1, p.perm.F=p.perm.F, origin=origin, nperm=nperm, method=method, var.names=var.names, call=match.call())

	}
#
class(out) <- "lmorigin"
out
}