File: predict.BTglmmPQL.R

package info (click to toggle)
r-cran-bradleyterry2 1.1-2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 1,208 kB
  • sloc: sh: 13; makefile: 5
file content (202 lines) | stat: -rwxr-xr-x 9,017 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#' Predict Method for BTglmmPQL Objects
#' 
#' Obtain predictions and optionally standard errors of those predictions from
#' a `"BTglmmPQL"` object.
#' 
#' If `newdata` is omitted the predictions are based on the data used for
#' the fit.  In that case how cases with missing values in the original fit are
#' treated is determined by the `na.action` argument of that fit.  If
#' `na.action = na.omit` omitted cases will not appear in the residuals,
#' whereas if `na.action = na.exclude` they will appear (in predictions
#' and standard errors), with residual value `NA`.  See also
#' `napredict`.
#' 
#' Standard errors for the predictions are approximated assuming the variance
#' of the random effects is known, see Booth and Hobert (1998).
#' 
#' @param object a fitted object of class `"BTglmmPQL"`
#' @param newdata (optional) a data frame in which to look for variables with
#' which to predict.  If omitted, the fitted linear predictors are used.
#' @param newrandom if `newdata` is provided, a corresponding design
#' matrix for the random effects, will columns corresponding to the random
#' effects estimated in the original model.
#' @param level an integer vector giving the level(s) at which predictions are
#' required. Level zero corresponds to population-level predictions (fixed
#' effects only), whilst level one corresponds to the individual-level
#' predictions (full model) which are NA for contests involving individuals not
#' in the original data. By default `level = 0` if the model converged to a 
#' fixed effects model, `1` otherwise.
#' @param type the type of prediction required.  The default is on the scale of
#' the linear predictors; the alternative `"response"` is on the scale of
#' the response variable. Thus for a default binomial model the default
#' predictions are of log-odds (probabilities on logit scale) and `type =
#' "response"` gives the predicted probabilities. The `"terms"` option
#' returns a matrix giving the fitted values of each term in the model formula
#' on the linear predictor scale (fixed effects only).
#' @param se.fit logical switch indicating if standard errors are required.
#' @param terms with `type ="terms"` by default all terms are returned.  A
#' character vector specifies which terms are to be returned.
#' @param na.action function determining what should be done with missing
#' values in `newdata`.  The default is to predict `NA`.
#' @param \dots further arguments passed to or from other methods.
#' @return If `se.fit = FALSE`, a vector or matrix of predictions.  If
#' `se = TRUE`, a list with components \item{fit }{Predictions}
#' \item{se.fit }{Estimated standard errors}
#' @author Heather Turner
#' @seealso [predict.glm()], [predict.BTm()]
#' @references Booth, J. G. and Hobert, J. P. (1998). Standard errors of
#' prediction in Generalized Linear Mixed Models. *Journal of the American
#' Statistical Association* **93**(441), 262 -- 272.
#' @keywords models
#' @examples
#' 
#' seedsModel <- glmmPQL(cbind(r, n - r) ~ seed + extract,
#'                       random = diag(nrow(seeds)),
#'                       family = binomial,
#'                       data = seeds)
#' 
#' pred <- predict(seedsModel, level = 0)
#' predTerms <- predict(seedsModel, type = "terms")
#' 
#' all.equal(pred, rowSums(predTerms) + attr(predTerms, "constant"))
#' 
#' @importFrom stats .checkMFClasses coef delete.response family model.frame model.matrix na.exclude na.pass napredict
#' @export
predict.BTglmmPQL <- function(object, newdata = NULL, newrandom = NULL,
                              level = ifelse(object$sigma == 0, 0, 1), 
                              type = c("link", "response", "terms"),
                              se.fit = FALSE, terms = NULL,
                              na.action = na.pass, ...) {
    ## only pass on if a glm
    if (object$sigma == 0) {
        if (level != 0) warning("Fixed effects model: setting level to 0")
        return(NextMethod())
    }
    if (!all(level %in% c(0, 1))) stop("Only level %in% c(0, 1) allowed")
    type <- match.arg(type)
    if (!is.null(newdata) || type == "terms") tt <- terms(object)
    if (!is.null(newdata)) {
        ## newdata should give variables in terms formula
        Terms <- delete.response(tt)
        m <- model.frame(Terms, newdata, na.action = na.action,
                         xlev = object$xlevels)
        na.action <- attr(m, "na.action")
        if (!is.null(cl <- attr(Terms, "dataClasses")))
            .checkMFClasses(cl, m)
        D <- model.matrix(Terms, m, contrasts.arg = object$contrasts)
        np <- nrow(D) # n predictions
        offset <- rep(0, np)
        if (!is.null(off.num <- attr(tt, "offset")))
            for (i in off.num) offset <- offset + eval(attr(tt,
                "variables")[[i + 1]], newdata)
        if (!is.null(object$call$offset))
            offset <- offset + eval(object$call$offset, newdata)
    }
    else {
        D <- model.matrix(object)
        newrandom <- object$random
        na.action <- object$na.action
        offset <- object$offset
    }
    cf <- coef(object)
    keep <- !is.na(cf)
    aa <- attr(D, "assign")[keep]
    cf <- cf[keep]
    D <- D[, keep, drop = FALSE]
    if (se.fit == TRUE) {
        sigma <- object$sigma
        w <- sqrt(object$weights)
        wX <- w * model.matrix(object)[, keep]
        wZ <- w * object$random
        XWX <- crossprod(wX)
        XWZ <- crossprod(wX, wZ)
        ZWZ <- crossprod(wZ, wZ)
        diag(ZWZ) <- diag(ZWZ) + 1/sigma^2
        K <- cbind(XWX, XWZ)
        K <- chol(rbind(K, cbind(t(XWZ), ZWZ)))
        if (type == "terms" || 0 %in% level){
            ## work out (chol of inverse of) topleft of K-inv directly
            A <- backsolve(chol(ZWZ), t(XWZ), transpose = TRUE)
            A <- chol(XWX - t(A) %*% A)
        }
    }
    if (type == "terms") { # ignore level
        if (1 %in% level)
            warning("type = \"terms\": setting level to 0", call. = FALSE)
        ll <- attr(tt, "term.labels")
        if (!is.null(terms)) {
            include <- ll %in% terms
            ll <- ll[include]
        }
        hasintercept <- attr(tt, "intercept") > 0L
        if (hasintercept) {
            avx <- colMeans(model.matrix(object))
            termsconst <- sum(avx * cf) #NA coefs?
            D <- sweep(D, 2, avx)
        }
        pred0 <- matrix(ncol = length(ll), nrow = NROW(D))
        colnames(pred0) <- ll
        if (se.fit) {
            A <- chol2inv(A)
            se.pred0 <- pred0
        }
        for (i in seq(length.out = length(ll))){
            ind <- aa == which(attr(tt, "term.labels") == ll[i])
            pred0[, i] <- D[, ind, drop = FALSE] %*% cf[ind]
            if (se.fit) {
                se.pred0[, i] <-  sqrt(diag(D[, ind] %*%
                                            tcrossprod(A[ind, ind], D[, ind])))
            }
        }
        if (hasintercept) attr(pred0, "constant") <- termsconst
        if (se.fit) return(list(fit = pred0, se.fit = se.pred0))
        return(pred0)
    }
    if (0 %in% level) {
        pred0 <- napredict(na.action, c(D %*% cf) + offset)
        if (type == "response")
            pred0 <- family(object)$linkinv(pred0)
        if (se.fit == TRUE) {
            na.act <- attr(na.exclude(pred0), "na.action")
            H <- backsolve(A, t(na.exclude(D)), transpose = TRUE)
            ## se.pred0 <- 
            ## sqrt(diag(D %*% chol2inv(K)[1:ncol(D), 1:ncol(D)] %*% t(D)))
            se.pred0 <- napredict(na.action, 
                                  napredict(na.act, sqrt(colSums(H^2))))
           if (type == "response")
               se.pred0 <- se.pred0*abs(family(object)$mu.eta(pred0))
            pred0 <- list(fit = pred0, se.fit = se.pred0)
        }
        if (identical(level, 0)) return(pred0)
    }

    r <- nrow(D)
    ## newrandom should give new design matrix for original random effects
    if (!is.null(newdata)){
        if(is.null(newrandom))
            stop("newdata specified without newrandom")
        if (!is.null(na.action))
            newrandom <- newrandom[-na.action, , drop = FALSE]
    }
    if (!identical(dim(newrandom), c(r, ncol(object$random))))
        stop("newrandom should have ", r, " rows and ",
             ncol(object$random), " columns")
    D <- cbind(D, newrandom)
    cf <- c(cf, attr(coef(object), "random"))
    pred <- napredict(na.action, c(D %*% cf) + offset)
    if (type == "response")
        pred <- family(object)$linkinv(pred)
    if (se.fit == TRUE) {
        ##se.pred <- sqrt(diag(D %*% chol2inv(K) %*% t(D)))
        na.act <- attr(na.exclude(pred), "na.action")
        H <- backsolve(K, t(na.exclude(D)), transpose = TRUE)
        se.pred <- napredict(na.action, napredict(na.act, sqrt(colSums(H^2))))
        if (type == "response")
            se.pred <- se.pred*abs(family(object)$mu.eta(pred))
        pred <- list(fit = pred, se.fit = se.pred)
    }

    if (0 %in% level)
        list(population = pred0, individual = pred)
    else pred
}