File: predict.BTm.R

package info (click to toggle)
r-cran-bradleyterry2 1.1-2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 1,208 kB
  • sloc: sh: 13; makefile: 5
file content (240 lines) | stat: -rwxr-xr-x 11,652 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#' Predict Method for Bradley-Terry Models
#' 
#' Obtain predictions and optionally standard errors of those predictions from
#' a fitted Bradley-Terry model.
#' 
#' If `newdata` is omitted the predictions are based on the data used for
#' the fit.  In that case how cases with missing values in the original fit are
#' treated is determined by the `na.action` argument of that fit.  If
#' `na.action = na.omit` omitted cases will not appear in the residuals,
#' whereas if `na.action = na.exclude` they will appear (in predictions
#' and standard errors), with residual value `NA`.  See also
#' `napredict`.
#' 
#' @param object a fitted object of class `"BTm"`
#' @param newdata (optional) a data frame in which to look for variables with
#' which to predict.  If omitted, the fitted linear predictors are used.
#' @param level for models with random effects: an integer vector giving the
#' level(s) at which predictions are required. Level zero corresponds to
#' population-level predictions (fixed effects only), whilst level one
#' corresponds to the player-level predictions (full model) which are NA for
#' contests involving players not in the original data. By default, `level = 0`
#' for a fixed effects model, `1` otherwise.
#' @param type the type of prediction required.  The default is on the scale of
#' the linear predictors; the alternative `"response"` is on the scale of
#' the response variable. Thus for a default Bradley-Terry model the default
#' predictions are of log-odds (probabilities on logit scale) and 
#' `type = "response"` gives the predicted probabilities. The `"terms"` option
#' returns a matrix giving the fitted values of each term in the model formula
#' on the linear predictor scale (fixed effects only).
#' @param se.fit logical switch indicating if standard errors are required.
#' @param dispersion a value for the dispersion, not used for models with
#' random effects. If omitted, that returned by `summary` applied to the
#' object is used, where applicable.
#' @param terms with `type ="terms"` by default all terms are returned.  A
#' character vector specifies which terms are to be returned.
#' @param na.action function determining what should be done with missing
#' values in `newdata`.  The default is to predict `NA`.
#' @param \dots further arguments passed to or from other methods.
#' @return If `se.fit = FALSE`, a vector or matrix of predictions.  If
#' `se = TRUE`, a list with components \item{fit }{Predictions}
#' \item{se.fit }{Estimated standard errors}
#' @author Heather Turner
#' @seealso [predict.glm()], [predict.glmmPQL()]
#' @keywords models
#' @examples
#' 
#' ## The final model in example(flatlizards)
#' result <- rep(1, nrow(flatlizards$contests))
#' Whiting.model3 <- BTm(1, winner, loser, ~ throat.PC1[..] + throat.PC3[..] +
#'                       head.length[..] + SVL[..] + (1|..),
#'                       family = binomial(link = "probit"),
#'                       data = flatlizards, trace = TRUE)
#' 
#' ## `new' data for contests between four of the original lizards
#' ## factor levels must correspond to original levels, but unused levels
#' ## can be dropped - levels must match rows of predictors
#' newdata  <- list(contests = data.frame(
#'                  winner = factor(c("lizard048", "lizard060"),
#'                  levels = c("lizard006", "lizard011", 
#'                             "lizard048", "lizard060")),
#'                  loser = factor(c("lizard006", "lizard011"),
#'                  levels = c("lizard006", "lizard011", 
#'                             "lizard048", "lizard060"))
#'                  ),
#'                  predictors = flatlizards$predictors[c(3, 6, 27, 33), ])
#' 
#' predict(Whiting.model3, level = 1, newdata = newdata)
#' 
#' ## same as
#' predict(Whiting.model3, level = 1)[1:2]
#' 
#' ## introducing a new lizard
#' newpred <- rbind(flatlizards$predictors[c(3, 6, 27),
#'                      c("throat.PC1","throat.PC3", "SVL", "head.length")],
#'                  c(-5, 1.5, 1, 0.1))
#' rownames(newpred)[4] <- "lizard059"
#' 
#' newdata  <- list(contests = data.frame(
#'                  winner = factor(c("lizard048", "lizard059"),
#'                  levels = c("lizard006", "lizard011", 
#'                             "lizard048", "lizard059")),
#'                  loser = factor(c("lizard006", "lizard011"),
#'                  levels = c("lizard006", "lizard011", 
#'                             "lizard048", "lizard059"))
#'                  ),
#'                  predictors = newpred)
#' 
#' ## can only predict at population level for contest with new lizard
#' predict(Whiting.model3, level = 0:1, se.fit = TRUE, newdata = newdata)
#' 
#' ## predicting at specific levels of covariates
#' 
#' ## consider a model from example(CEMS)
#' table6.model <-  BTm(outcome = cbind(win1.adj, win2.adj),
#'                      player1 = school1, player2 = school2,
#'                      formula = ~ .. +
#'                          WOR[student] * Paris[..] +
#'                          WOR[student] * Milano[..] +
#'                          WOR[student] * Barcelona[..] +
#'                          DEG[student] * St.Gallen[..] +
#'                          STUD[student] * Paris[..] +
#'                          STUD[student] * St.Gallen[..] +
#'                          ENG[student] * St.Gallen[..] +
#'                          FRA[student] * London[..] +
#'                          FRA[student] * Paris[..] +
#'                          SPA[student] * Barcelona[..] +
#'                          ITA[student] * London[..] +
#'                          ITA[student] * Milano[..] +
#'                          SEX[student] * Milano[..],
#'                      refcat = "Stockholm",
#'                      data = CEMS)
#'                      
#' ## estimate abilities for a combination not seen in the original data
#' 
#' ## same schools
#' schools <- levels(CEMS$preferences$school1)
#' ## new student data
#' students <- data.frame(STUD = "other", ENG = "good", FRA = "good", 
#'                        SPA = "good", ITA = "good", WOR = "yes", DEG = "no",
#'                        SEX = "female", stringsAsFactors = FALSE)
#' ## set levels to be the same as original data    
#' for (i in seq_len(ncol(students))){
#'     students[,i] <- factor(students[,i], levels(CEMS$students[,i]))
#' }
#' newdata <- list(preferences = 
#'     data.frame(student = factor(500), # new id matching with `students[1,]`
#'                school1 = factor("London", levels = schools),
#'                school2 = factor("Paris", levels = schools)),
#'     students = students,
#'     schools = CEMS$schools)
#' 
#' ## warning can be ignored as model specification was over-parameterized
#' predict(table6.model, newdata = newdata)
#' 
#' ## if treatment contrasts are use (i.e. one player is set as the reference
#' ## category), then predicting the outcome of contests against the reference
#' ## is equivalent to estimating abilities with specific covariate values
#' 
#' ## add student with all values at reference levels 
#' students <- rbind(students,
#'     data.frame(STUD = "other", ENG = "good", FRA = "good", 
#'                SPA = "good", ITA = "good", WOR = "no", DEG = "no",
#'                SEX = "female", stringsAsFactors = FALSE))
#' ## set levels to be the same as original data    
#' for (i in seq_len(ncol(students))){
#'     students[,i] <- factor(students[,i], levels(CEMS$students[,i]))
#' }
#' newdata <- list(preferences = 
#'     data.frame(student = factor(rep(c(500, 502), each = 6)), 
#'                school1 = factor(schools, levels = schools),
#'                school2 = factor("Stockholm", levels = schools)),
#'     students = students,
#'     schools = CEMS$schools)
#'     
#' predict(table6.model, newdata = newdata, se.fit = TRUE)
#' 
#' ## the second set of predictions (elements 7-12) are equivalent to the output 
#' ## of BTabilities; the first set are adjust for `WOR` being equal to "yes"
#' BTabilities(table6.model)
#' 
#' @importFrom stats model.matrix na.pass reformulate
#' @export
predict.BTm <- function (object, newdata = NULL, 
                         level = ifelse(is.null(object$random), 0, 1),
                         type = c("link", "response", "terms"), se.fit = FALSE,
                         dispersion = NULL, terms = NULL,
                         na.action = na.pass, ...) {
    type <- match.arg(type)
    if (!is.null(newdata)) {
        ## need to define X so will work with model terms
        setup <- match(c("player1", "player2", "formula", "id",
                        "separate.ability", "refcat", "weights",
                        "subset", "offset", "contrasts"), names(object$call),
                       0L)
        setup <- do.call(BTm.setup,
                         c(as.list(object$call)[setup], list(data = newdata)),
                         envir = environment(object$formula))
        nfix <- length(object$coefficients)
        newdata <- data.frame(matrix(, nrow(setup$X), 0))
        keep <- as.logical(match(colnames(setup$X), names(object$coefficients),
                                 nomatch = 0))
        if (any(!keep)){
            ## new players with missing data - set to NA
            missing <- rowSums(setup$X[,!keep, drop = FALSE]) != 0
            setup$X <- setup$X[, keep, drop = FALSE]
            setup$X[missing,] <- NA
        }
        if (ncol(setup$X) != nfix) {
            ## newdata does not include original players with missing data
            X <- matrix(0, nrow(setup$X), nfix,
                        dimnames = list(rownames(setup$X),
                        names(object$coefficients)))
            X[, colnames(setup$X)] <- setup$X
            newdata$X <- X
        }
        else newdata$X <- setup$X
        nran <- length(attr(object$coefficients, "random"))
        if (1 %in% level && !is.null(object$random) && type != "terms"){
            if (ncol(setup$random) != nran) {
                ## expand to give col for every random effect
                Z <- matrix(0, nrow(setup$random), nran,
                            dimnames = list(rownames(setup$random),
                            colnames(object$random))) #ranef need names!!
                ## set to NA for contests with new players 
                ## (with predictors present)
                miss <- !colnames(setup$random) %in% colnames(Z)
                Z[, colnames(setup$random)[!miss]] <- setup$random[,!miss]
                if (any(miss)) {
                    miss <- rowSums(setup$random[, miss, drop = FALSE] != 0) > 0
                    Z[miss,] <- NA
                }
                newrandom <- Z
            }
            else newrandom <- setup$random
            return(NextMethod(newrandom = newrandom))
        }
    }
    if (type == "terms") {
        object$x <- model.matrix(object)
        attr(object$x, "assign") <- object$assign
        id <- unique(object$assign)
        terms <- paste("X", id, sep = "")
        object$terms <- terms(reformulate(c(0, terms)))
        splitX <- function(X) {
            newdata <- data.frame(matrix(, nrow(X), 0))
            for (i in seq(id))
                newdata[terms[i]] <- X[,object$assign == id[i]]
            newdata
        }
        if (is.null(newdata)) newdata <- splitX(object$x)
        else newdata <- splitX(newdata$X)
        tmp <- NextMethod(newdata = newdata)
        #tmp$fit[tmp$se.fit == 0] <- NA
        tmp$se.fit[tmp$se.fit == 0] <- NA
        colnames(tmp$fit) <- colnames(tmp$se.fit) <-
            c("(separate)"[0 %in% id], object$term.labels)
        return(tmp)
    }
    else NextMethod()
}