File: linmod.R

package info (click to toggle)
r-cran-plotmo 3.7.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,400 kB
  • sloc: sh: 13; makefile: 2
file content (283 lines) | stat: -rw-r--r-- 11,115 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# linmod.R: Example S3 linear model.
#
# See www.milbo.org/doc/modguide.pdf.
# This software may be freely used.

linmod <- function(...) UseMethod("linmod")

linmod.default <- function(x = stop("no 'x' argument"),
                           y = stop("no 'y' argument"),
                           keep = FALSE,
                           ...)
{
    stop.if.dot.arg.used(...)
    xmat <- as.matrix(x)
    # use name "(Intercept)" here so coef names match linmod.formula
    x <- cbind("(Intercept)" = 1, xmat)
    fit <- linmod.fit(x, y)
    fit$call <- match.call()
    if(keep) {
        fit$x <- xmat
        # save y as a one-column matrix, so can use colname to save response name
        colname <- deparse(substitute(y))[1]
        colname <- gsub(" ", "", substr(colname, 1, 100)) # strip spaces, truncate
        fit$y <- as.matrix(y, ncol = 1)
        colnames(fit$y) <- colname
    }
    fit
}
linmod.formula <- function(formula = stop("no 'formula' argument"),
                           data = parent.frame(),
                           keep = FALSE,
                           ...)
{
    stop.if.dot.arg.used(...)
    if(is.matrix(data))             # allow data to be a matrix
        data <- as.data.frame(data) # will create colnames V1 V2 V3 if necessary
    # note that na.action=na.pass because we will catch NAs later
    # in linmod.fit, for uniformity with linmod.default
    mf <- model.frame(formula = formula, data = data, na.action = na.pass)
    terms <- attr(mf, "terms")
    x <- model.matrix(terms, mf)
    y <- model.response(mf)
    fit <- linmod.fit(x, y)
    fit$call <- match.call()
    fit$terms <- terms
    fit$xlevels <- .getXlevels(terms, mf) # for use by predict.linmod
    if(keep)
        fit$data <- data
    fit
}
linmod.fit <- function(x = stop("no 'x' argument"),
                       y = stop("no 'y' argument"),
                       ...)
{
    # internal function, not for the casual user
    # if model has an intercept, the first col of x must be intercept (all 1s)

    stop.if.dot.arg.used(...)
    x <- check.linmod.x(x)
    y <- check.linmod.y(x, y)
    fit <- do.linmod.fit(x, y)
    class(fit) <- "linmod"
    fit
}
check.linmod.x <- function(x)
{
    if(!is.matrix(x))
        stop("'x' is not a matrix or could not be converted to a matrix")
    if(NROW(x) == 0 || NCOL(x) == 0)
        stop("'x' is empty")
    if(anyNA(x))
        stop("NA in 'x'")
    # checking just the first column of x suffices because all columns
    # of a matrix have the same type
    # we allow is.logical because qr etc. treat logical vars as numeric
    if(!is.numeric(x[,1]) && !is.logical(x[,1]))
        stop("non-numeric column in 'x'")
    # ensure all columns in x are named (needed for names in vcov etc.)
    # use the same naming convention as lm (prefix for unnamed cols is "V")
    missing.colnames <-
        if(is.null(colnames(x))) 1:NCOL(x)
        else                     nchar(colnames(x)) == 0
    colnames(x)[missing.colnames] <-
        c("(Intercept)",
          paste("V", seq_len(NCOL(x) - 1), sep = ""))[missing.colnames]
    duplicated <- which(duplicated(colnames(x)))
    if(length(duplicated))
        stop("column name \"", colnames(x)[duplicated[1]],
             "\" in 'x' is duplicated")
    x
}
check.linmod.y <- function(x, y)
{
    # as.vector(as.matrix(y)) is necessary when y is a data.frame
    # (because as.vector alone on a data.frame returns a data.frame)
    y <- as.vector(as.matrix(y))
    if(length(y) == 0)
        stop("'y' is empty")
    if(anyNA(y))
        stop("NA in 'y'")
    if(!is.numeric(y) && !is.logical(y))
        stop("'y' is not numeric or logical")
    if(length(y) != nrow(x))
        stop("nrow(x) is ", nrow(x), " but length(y) is ", length(y))
    y
}
do.linmod.fit <- function(x, y)
{
    # workhorse function for fitting linear models
    # essential processing and sanity checks on x and y are already completed
    # x is a numeric matrix, y is a numeric vector

    qx <- qr(x)                         # QR-decomposition of x
    if(qx$rank < ncol(x))
        stop("'x' is singular (it has ", ncol(x),
             " columns but its rank is ", qx$rank, ")\n  colnames(x): ",
             paste0(colnames(x), collapse=' '))
    coef <- solve.qr(qx, y)             # compute (x'x)^(-1) x'y
    stopifnot(!anyNA(coef))             # NA impossible after rank check above
    df.residual <- max(0, nrow(x) - ncol(x)) # degrees of freedom
    sigma2 <- sum((y - x %*% coef)^2) / df.residual # variance of residuals
    vcov <- sigma2 * chol2inv(qx$qr)    # covar mat is sigma^2 * (x'x)^(-1)
    fitted.values <- qr.fitted(qx, y)

    colnames(vcov) <- rownames(vcov) <- colnames(x)
    names(fitted.values) <- rownames(x)
    colnames(coef) <- colnames(y)

    # returned fields match lm's fields
    list(coefficients  = coef,
         residuals     = y - fitted.values,
         rank          = qx$rank,
         fitted.values = fitted.values,
         vcov          = vcov,
         sigma         = sqrt(sigma2),
         df.residual   = df.residual)
}
predict.linmod <- function(object = stop("no 'object' argument"),
                           newdata = NULL,
                           type = "response",
                           ...)
{
    stopifnot(inherits(object, "linmod"))
    stop.if.dot.arg.used(...)
    match.arg(type, "response") # the type argument is not yet supported
    if(is.null(newdata))
        yhat <- fitted(object)
    else {
        if(NROW(newdata) == 0)
            stop("'newdata' is empty")      # preempt obscure message later

        x <- if(is.null(object$terms))      # model built with linmod.default?
                process.newdata(object, newdata)
            else                            # model built with linmod.formula
                process.newdata.formula(object, newdata)

        # The following tests suffice to catch all illegal input.  However
        # they aren't ideal in that they don't always direct you to the root
        # cause of the problem (i.e. the error messages aren't always optimal).
        nvar <- length(object$coefficients) - 1 # nbr vars, -1 for intercept
        if(ncol(x) - 1 != nvar)
            stop("ncol(newdata) is ", ncol(x) - 1, " but should be ", nvar)
        if(anyNA(x))
            stop("NA in 'newdata'")
        if(!is.numeric(x[,1]) && !is.logical(x[,1]))
            stop("non-numeric column in 'newdata' (after processing)")

        yhat <- as.vector(do.predict.linmod(object, x))
        names(yhat) <- rownames(x)
    }
    yhat
}
process.newdata <- function(object, newdata)
{
    # process newdata for models built with linmod.default

    x <- if(is.vector(newdata))     # allow newdata to be a vector
            matrix(newdata, ncol = length(object$coefficients) - 1)
         else
            as.matrix(newdata)      # allow newdata to be a data.frame

    cbind(1, x)                     # return data with an intercept column
}
process.newdata.formula <- function(object, newdata)
{
    # process newdata for models built with linmod.formula

    newdata <- as.data.frame(newdata) # allows newdata to be a matrix
    terms <- object$terms
    dataClasses <- attr(terms, "dataClasses")
    iresp <- attr(terms, "response")
    terms <- delete.response(terms)
    # na.action=na.pass because we will catch NAs after (for clearer error msg)
    # xlevels is needed to convert strings to factor levels, for example:
    #    a <- linmod(Sepal.Length~Species,data=iris)
    #    predict(a,newdata=data.frame(Species="setosa"))
    mf <- model.frame(terms, newdata, na.action = na.pass, xlev = object$xlevels)
    if(anyNA(mf))
        stop("NA in 'newdata'")
    if(NROW(mf) != NROW(newdata)) {
        # Get here when model.frame() issues
        #   Warning: 'newdata' had M rows but variables found have N rows
        # Must stop, else the call to model.matrix() below would silently return bad data.

        # If a variable is missing, print its name to help the user.
        # TODO This will erroneously identify "sqrt(x)" as a missing var in the
        #      formula "y ~ sqrt(x)" (because the var is wrapped in a func call).
        varnames <- names(dataClasses)
        varnames <- varnames[-iresp]
        missing <- which(!(varnames %in% colnames(newdata)))
        missing.msg <- ""
        if(length(missing))
            missing.msg <- paste0(" (variable '", varnames[missing[1]],
                                  "' may be missing from newdata)")
        stop("newdata has ", NROW(newdata),
             " rows but model.frame returned ", NROW(mf), " rows", missing.msg)
    }
    .checkMFClasses(dataClasses, mf) # check types in newdata match original data
    model.matrix(terms, mf)
}
do.predict.linmod <- function(object, x)
{
    # workhorse function for linear model predictions
    # processing by model.matrix etc. and sanity checks on x already completed
    # x is a numeric matrix (if model has intercept, first col of x is all 1s)

    x %*% coef(object)
}
summary.linmod <- function(object = stop("no 'object' argument"), ...)
{
    stop.if.dot.arg.used(...)
    se <- sqrt(diag(object$vcov))
    t.value <- coef(object) / se
    p.value <-
        if(object$df.residual == 0) # avoid warning from pt()
            rep_len(0, length.out=length(t.value))
        else
            2 * pt(-abs(t.value), df = object$df.residual)

    coefficients <- cbind(Estimate = coef(object),
                          StdErr   = se,
                          t.value  = t.value,
                          p.value  = p.value)

    retval <- list(call         = object$call,
                   coefficients = coefficients)

    class(retval) <- "summary.linmod"

    retval
}
print.linmod <- function(x = stop("no 'x' argument"), ...)
{
    stop.if.dot.arg.used(...)
    print.model.call(x)
    print(x$coefficients)
    invisible(x)
}
print.summary.linmod <- function(x = stop("no 'x' argument"), ...)
{
    stop.if.dot.arg.used(...)
    print.model.call(x)
    print(x$coefficients)
    invisible(x)
}
print.model.call <- function(x)
{
    cat("Call: ") # print.lm has a newline here, but a space is more compact
    # use paste0 to convert vector of strings to single string if necessary
    cat(strwrap(paste0(deparse(x$call, control = NULL, nlines = 5),
                       sep = " ", collapse = " "), exdent = 6), sep = "\n")
    cat("\n")
}
# stop.if.dot.arg.used will cause an error message if any args are passed to it.
# We use it to test if any dots arg of the calling function was used, for
# functions that must have a dots arg (to match the generic method) but don't
# actually use the dots.  This helps the user catch mistyped or illegal args.
# R version 3.3-0 or higher has a function chkDots which could be used instead.

stop.if.dot.arg.used <- function()
{
    NULL
}