File: glmmPQL.R

package info (click to toggle)
r-cran-bradleyterry2 1.1-2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 1,208 kB
  • sloc: sh: 13; makefile: 5
file content (215 lines) | stat: -rwxr-xr-x 10,803 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#' PQL Estimation of Generalized Linear Mixed Models
#' 
#' Fits GLMMs with simple random effects structure via Breslow and Clayton's
#' PQL algorithm.
#' The GLMM is assumed to be of the form \ifelse{html}{\out{g(<b>&mu;</b>) =
#' <b>X&beta;</b> + <b>Ze</b>}}{\deqn{g(\boldsymbol{\mu}) = \boldsymbol{X\beta}
#' + \boldsymbol{Ze}}{ g(mu) = X * beta + Z * e}} where \eqn{g} is the link
#' function, \ifelse{html}{\out{<b>&mu;</b>}}{\eqn{\boldsymbol{\mu}}{mu}} is the
#' vector of means and \ifelse{html}{\out{<b>X</b>, <b>Z</b>}}{\eqn{\boldsymbol{X},
#' \boldsymbol{Z}}{X,Z}} are design matrices for the fixed effects
#' \ifelse{html}{\out{<b>&beta;</b>}}{\eqn{\boldsymbol{\beta}}{beta}} and random
#' effects \ifelse{html}{\out{<b>e</b>}}{\eqn{\boldsymbol{e}}{e}} respectively.
#' Furthermore the random effects are assumed to be i.i.d.
#' \ifelse{html}{\out{N(0, &sigma;<sup>2</sup>)}}{\eqn{N(0, \sigma^2)}{
#' N(0, sigma^2)}}.
#' 
#' @param fixed a formula for the fixed effects.
#' @param random a design matrix for the random effects, with number of rows
#' equal to the length of variables in `formula`.
#' @param family a description of the error distribution and link function to
#' be used in the model. This can be a character string naming a family
#' function, a family function or the result of a call to a family function.
#' (See [family()] for details of family functions.)
#' @param data an optional data frame, list or environment (or object coercible
#' by [as.data.frame()] to a data frame) containing the variables in
#' the model.  If not found in `data`, the variables are taken from
#' `environment(formula)`, typically the environment from which
#' `glmmPQL` called.
#' @param subset an optional logical or numeric vector specifying a subset of
#' observations to be used in the fitting process.
#' @param weights an optional vector of \sQuote{prior weights} to be used in
#' the fitting process.
#' @param offset an optional numeric vector to be added to the linear predictor
#' during fitting. One or more `offset` terms can be included in the
#' formula instead or as well, and if more than one is specified their sum is
#' used.  See [model.offset()].
#' @param na.action a function which indicates what should happen when the data
#' contain `NA`s.  The default is set by the `na.action` setting of
#' [options()], and is [na.fail()] if that is unset.
#' @param start starting values for the parameters in the linear predictor.
#' @param etastart starting values for the linear predictor.
#' @param mustart starting values for the vector of means.
#' @param control a list of parameters for controlling the fitting process.
#' See the [glmmPQL.control()] for details.
#' @param sigma a starting value for the standard deviation of the random
#' effects.
#' @param sigma.fixed logical: whether or not the standard deviation of the
#' random effects should be fixed at its starting value.
#' @param model logical: whether or not the model frame should be returned.
#' @param x logical: whether or not the design matrix for the fixed effects
#' should be returned.
#' @param contrasts an optional list. See the `contrasts.arg` argument of
#' [model.matrix()].
#' @param \dots arguments to be passed to [glmmPQL.control()].
#' @return An object of class `"BTglmmPQL"` which inherits from
#' `"glm"` and `"lm"`: \item{coefficients}{ a named vector of
#' coefficients, with a `"random"` attribute giving the estimated random
#' effects.} \item{residuals}{ the working residuals from the final iteration
#' of the IWLS loop.} \item{random}{the design matrix for the random effects.}
#' \item{fitted.values}{ the fitted mean values, obtained by transforming the
#' linear predictors by the inverse of the link function.} \item{rank}{the
#' numeric rank of the fitted linear model.} \item{family}{the `family`
#' object used.} \item{linear.predictors}{the linear fit on link scale.}
#' \item{deviance}{up to a constant, minus twice the maximized log-likelihood.}
#' \item{aic}{a version of Akaike's *An Information Criterion*, minus
#' twice the maximized log-likelihood plus twice the number of parameters,
#' computed by the `aic` component of the family.}
#' \item{null.deviance}{the deviance for the null model, comparable with
#' `deviance`.} \item{iter}{the numer of iterations of the PQL algorithm.}
#' \item{weights}{the working weights, that is the weights in the final
#' iteration of the IWLS loop.} \item{prior.weights}{the weights initially
#' supplied, a vector of `1`'s if none were.} \item{df.residual}{the
#' residual degrees of freedom.} \item{df.null}{the residual degrees of freedom
#' for the null model.} \item{y}{if requested (the default) the `y` vector
#' used. (It is a vector even for a binomial model.)} \item{x}{if requested,
#' the model matrix.} \item{model}{if requested (the default), the model
#' frame.} \item{converged}{logical. Was the PQL algorithm judged to have
#' converged?} \item{call}{the matched call.} \item{formula}{the formula
#' supplied.} \item{terms}{the `terms` object used.} \item{data}{the
#' `data` argument used.} \item{offset}{the offset vector used.}
#' \item{control}{the value of the `control` argument used.}
#' \item{contrasts}{(where relevant) the contrasts used.} \item{xlevels}{(where
#' relevant) a record of the levels of the factors used in fitting.}
#' \item{na.action}{(where relevant) information returned by `model.frame`
#' on the special handling of `NA`s.} \item{sigma}{the estimated standard
#' deviation of the random effects} \item{sigma.fixed}{logical: whether or not
#' `sigma` was fixed} \item{varFix}{the variance-covariance matrix of the
#' fixed effects} \item{varSigma}{the variance of `sigma`}
#' @author Heather Turner
#' @seealso
#' [predict.BTglmmPQL()],[glmmPQL.control()],[BTm()]
#' @references Breslow, N. E. and Clayton, D. G. (1993) Approximate inference
#' in Generalized Linear Mixed Models. *Journal of the American
#' Statistical Association* **88**(421), 9--25.
#' 
#' Harville, D. A. (1977) Maximum likelihood approaches to variance component
#' estimation and to related problems. *Journal of the American
#' Statistical Association* **72**(358), 320--338.
#' @keywords models
#' @examples
#' 
#' ###############################################
#' ## Crowder seeds example from Breslow & Clayton
#' ###############################################
#' 
#' summary(glmmPQL(cbind(r, n - r) ~ seed + extract,
#'         random = diag(nrow(seeds)),
#'         family = "binomial", data = seeds))
#' 
#' summary(glmmPQL(cbind(r, n - r) ~ seed*extract,
#'                 random = diag(nrow(seeds)),
#'                 family = "binomial", data = seeds))
#' 
#' @importFrom stats gaussian .getXlevels glm.control is.empty.model glm.control glm.fit model.frame model.matrix model.offset model.response model.weights optimize terms
#' @export
glmmPQL <- function(fixed, random = NULL, family = "binomial", data = NULL,
                    subset = NULL, weights = NULL, offset = NULL,
                    na.action = NULL,  start = NULL, etastart = NULL,
                    mustart = NULL, control = glmmPQL.control(...),
                    sigma = 0.1, sigma.fixed = FALSE, model = TRUE,
                    x = FALSE, contrasts = NULL, ...) {
    call <-  match.call()
    nm <- names(call)[-1]

    if (is.null(random)) {
        keep <- is.element(nm, c("family", "data", "subset", "weights",
                                 "offset", "na.action"))
        for (i in nm[!keep]) call[[i]] <- NULL
        call$formula <- fixed
        environment(call$formula) <- environment(fixed)
        call[[1]] <- as.name("glm")
        return(eval.parent(call))
    }

    modelTerms <- terms(fixed, data = data)
    modelCall <- as.list(match.call(expand.dots = FALSE))
    argPos <- match(c("data", "subset", "na.action", "weights", "offset"),
                    names(modelCall), 0)
    modelData <- as.call(c(model.frame, list(formula = modelTerms,
                                             drop.unused.levels = TRUE),
                           modelCall[argPos]))
    modelData <- eval(modelData, parent.frame())
    
    if (!is.matrix(random) || nrow(random) != nrow(modelData)) {
        stop("`random` should be a matrix object, with ", nrow(modelData), 
             " rows.")
    }

    if (!is.null(modelCall$subset))
        Z <- random[eval(modelCall$subset, data, parent.frame()),]
    else Z <- random

    if (!is.null(attr(modelData, "na.action")))
        Z <- Z[-attr(modelData, "na.action"),]

    nObs <- nrow(modelData)
    y <- model.response(modelData, "numeric")
    if (is.null(y))
        y <- rep(0, nObs)
    weights <- as.vector(model.weights(modelData))
    if (!is.null(weights) && any(weights < 0))
        stop("negative weights are not allowed")
    if (is.null(weights))
        weights <- rep.int(1, nObs)
    offset <- as.vector(model.offset(modelData))
    if (is.null(offset))
        offset <- rep.int(0, nObs)

    if (is.character(family))
        family <- get(family, mode = "function", envir = parent.frame())
    if (is.function(family))
        family <- family()
    if (is.null(family$family)) {
        print(family)
        stop("`family' not recognized")
    }
    if (family$family == "binomial") {
        if (is.factor(y) && NCOL(y) == 1)
            y <- y != levels(y)[1]
        else if (NCOL(y) == 2) {
            n <- y[, 1] + y[, 2]
            y <- ifelse(n == 0, 0, y[, 1]/n)
            weights <- weights * n
        }
    }

    ## Use GLM to estimate fixed effects
    empty <- is.empty.model(modelTerms)
    if (!empty)
        X <- model.matrix(formula(modelTerms), data = modelData, contrasts)
    else
        X <- matrix(, nObs, 0)
    fit <- glmmPQL.fit(X = X, y = y, Z = Z, weights = weights, start = start,
                       etastart = etastart, mustart = mustart, offset = offset,
                       family = family, control = control, sigma = sigma,
                       sigma.fixed = sigma.fixed, ...)
    if (sum(offset) && attr(modelTerms, "intercept") > 0) {
        fit$null.deviance <- glm.fit(x = X[, "(Intercept)", drop = FALSE],
            y = y, weights = weights, offset = offset, family = family,
            control = glm.control(), intercept = TRUE)$deviance
    }
    if (model)
        fit$model <- modelData
    fit$na.action <- attr(modelData, "na.action")
    if (x)
        fit$x <- X
    fit <- c(fit, list(call = call, formula = fixed, random = random,
                       terms = modelTerms,
                       data = data, offset = offset, control = control,
                       method = "glmmPQL.fit", contrasts = attr(X, "contrasts"),
                       xlevels = .getXlevels(modelTerms, modelData)))
    class(fit) <- c("BTglmmPQL", "glm", "lm")
    fit
}