File: GenDavidson.R

package info (click to toggle)
r-cran-bradleyterry2 1.1-2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 1,208 kB
  • sloc: sh: 13; makefile: 5
file content (260 lines) | stat: -rwxr-xr-x 12,927 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#' Specify a Generalised Davidson Term in a gnm Model Formula
#' 
#' GenDavidson is a function of class `"nonlin"` to specify a generalised
#' Davidson term in the formula argument to [gnm::gnm()], providing a
#' model for paired comparison data where ties are a possible outcome.
#' 
#' `GenDavidson` specifies a generalisation of the Davidson model (1970)
#' for paired comparisons where a tie is a possible outcome. It is designed for
#' modelling trinomial counts corresponding to the win/draw/loss outcome for
#' each contest, which are assumed Poisson conditional on the total count for
#' each match. Since this total must be one, the expected counts are
#' equivalently the probabilities for each possible outcome, which are modelled
#' on the log scale: \deqn{\log(p(i \textrm{beats} j)_k) = \theta_{ijk} +
#' \log(\mu\alpha_i}{log(p(i beats j)_k) = theta_{ijk} + log(mu * alpha_i)}
#' \deqn{\log(p(draw)_k) = \theta_{ijk} + \delta + c + }{ log(p(draw)_k) =
#' theta_{ijk} + log(delta) + c + sigma * (pi * log(mu * alpha_i) + (1 - pi) *
#' log(alpha_j)) + (1 - sigma) * log(mu * alpha_i + alpha_j) }\deqn{
#' \sigma(\pi\log(\mu\alpha_i) - (1 - \pi)log(\alpha_j)) + }{ log(p(draw)_k) =
#' theta_{ijk} + log(delta) + c + sigma * (pi * log(mu * alpha_i) + (1 - pi) *
#' log(alpha_j)) + (1 - sigma) * log(mu * alpha_i + alpha_j) }\deqn{ (1 -
#' \sigma)(\log(\mu\alpha_i + \alpha_j))}{ log(p(draw)_k) = theta_{ijk} +
#' log(delta) + c + sigma * (pi * log(mu * alpha_i) + (1 - pi) * log(alpha_j))
#' + (1 - sigma) * log(mu * alpha_i + alpha_j) } \deqn{\log(p(j \textrm{beats}
#' i)_k) = \theta_{ijk} + }{log(p(j beats i)_k) = theta_{ijk} +
#' log(alpha_j)}\deqn{ log(\alpha_j)}{log(p(j beats i)_k) = theta_{ijk} +
#' log(alpha_j)} Here \eqn{\theta_{ijk}}{theta_{ijk}} is a structural parameter
#' to fix the trinomial totals; \eqn{\mu}{mu} is the home advantage parameter;
#' \eqn{\alpha_i}{alpha_i} and \eqn{\alpha_j}{alpha_j} are the abilities of
#' players \eqn{i} and \eqn{j} respectively; \eqn{c}{c} is a function of the
#' parameters such that \eqn{\textrm{expit}(\delta)}{plogis(delta)} is the
#' maximum probability of a tie, \eqn{\sigma}{sigma} scales the dependence of
#' the probability of a tie on the relative abilities and \eqn{\pi}{pi} allows
#' for asymmetry in this dependence.
#' 
#' For parameters that must be positive (\eqn{\alpha_i, \sigma, \mu}{alpha,
#' sigma, mu}), the log is estimated, while for parameters that must be between
#' zero and one (\eqn{\delta, \pi}), the logit is estimated, as illustrated in
#' the example.
#' 
#' @param win a logical vector: `TRUE` if player1 wins, `FALSE`
#' otherwise.
#' @param tie a logical vector: `TRUE` if the outcome is a tie,
#' `FALSE` otherwise.
#' @param loss a logical vector: `TRUE` if player1 loses, `FALSE`
#' otherwise.
#' @param player1 an ID factor specifying the first player in each contest,
#' with the same set of levels as `player2`.
#' @param player2 an ID factor specifying the second player in each contest,
#' with the same set of levels as `player2`.
#' @param home.adv a formula for the parameter corresponding to the home
#' advantage effect. If `NULL`, no home advantage effect is estimated.
#' @param tie.max a formula for the parameter corresponding to the maximum tie
#' probability.
#' @param tie.scale a formula for the parameter corresponding to the scale of
#' dependence of the tie probability on the probability that `player1`
#' wins, given the outcome is not a draw.
#' @param tie.mode a formula for the parameter corresponding to the location of
#' maximum tie probability, in terms of the probability that `player1`
#' wins, given the outcome is not a draw.
#' @param at.home1 a logical vector: `TRUE` if `player1` is at home,
#' `FALSE` otherwise.
#' @param at.home2 a logical vector: `TRUE` if `player2` is at home,
#' `FALSE` otherwise.
#' @return A list with the anticipated components of a "nonlin" function:
#' \item{ predictors }{ the formulae for the different parameters and the ID
#' factors for player 1 and player 2. } \item{ variables }{ the outcome
#' variables and the \dQuote{at home} variables, if specified.  } \item{ common
#' }{ an index to specify that common effects are to be estimated for the
#' players. } \item{ term }{ a function to create a deparsed mathematical
#' expression of the term, given labels for the predictors.} \item{ start }{ a
#' function to generate starting values for the parameters.}
#' @author Heather Turner
#' @seealso [football()], [plotProportions()]
#' @references Davidson, R. R. (1970). On extending the Bradley-Terry model to
#' accommodate ties in paired comparison experiments. *Journal of the
#' American Statistical Association*, **65**, 317--328.
#' @keywords models nonlinear
#' @examples
#' 
#' ### example requires gnm
#' if (require(gnm)) {
#'     ### convert to trinomial counts
#'     football.tri <- expandCategorical(football, "result", idvar = "match")
#'     head(football.tri)
#' 
#'     ### add variable to indicate whether team playing at home
#'     football.tri$at.home <- !logical(nrow(football.tri))
#' 
#'     ### fit shifted & scaled Davidson model
#'     ###  - subset to first and last season for illustration
#'     shifScalDav <- gnm(count ~
#'         GenDavidson(result == 1, result == 0, result == -1,
#'                     home:season, away:season, home.adv = ~1,
#'                     tie.max = ~1, tie.scale = ~1, tie.mode = ~1,
#'                     at.home1 = at.home,
#'                     at.home2 = !at.home) - 1,
#'         eliminate = match, family = poisson, data = football.tri,
#'         subset = season %in% c("2008-9", "2012-13"))
#' 
#'     ### look at coefs
#'     coef <- coef(shifScalDav)
#'     ## home advantage
#'     exp(coef["home.adv"])
#'     ## max p(tie)
#'     plogis(coef["tie.max"])
#'     ## mode p(tie)
#'     plogis(coef["tie.mode"])
#'     ## scale relative to Davidson of dependence of p(tie) on p(win|not a draw)
#'     exp(coef["tie.scale"])
#' 
#'     ### check model fit
#'     alpha <- names(coef[-(1:4)])
#'     plotProportions(result == 1, result == 0, result == -1,
#'                     home:season, away:season,
#'                     abilities = coef[alpha], home.adv = coef["home.adv"],
#'                     tie.max = coef["tie.max"], tie.scale = coef["tie.scale"],
#'                     tie.mode = coef["tie.mode"],
#'                     at.home1 = at.home, at.home2 = !at.home,
#'                     data = football.tri, subset = count == 1)
#' }
#' 
#' ### analyse all five seasons
#' ### - takes a little while to run, particularly likelihood ratio tests
#' \dontrun{
#' ### fit Davidson model
#' Dav <- gnm(count ~ GenDavidson(result == 1, result == 0, result == -1,
#'                                home:season, away:season, home.adv = ~1,
#'                                tie.max = ~1,
#'                                at.home1 = at.home,
#'                                at.home2 = !at.home) - 1,
#'            eliminate = match, family = poisson, data = football.tri)
#' 
#' ### fit scaled Davidson model
#' scalDav <- gnm(count ~ GenDavidson(result == 1, result == 0, result == -1,
#'                                   home:season, away:season, home.adv = ~1,
#'                                   tie.max = ~1, tie.scale = ~1,
#'                                   at.home1 = at.home,
#'                                   at.home2 = !at.home) - 1,
#'                eliminate = match, family = poisson, data = football.tri)
#' 
#' ### fit shifted & scaled Davidson model
#' shifScalDav <- gnm(count ~
#'     GenDavidson(result == 1, result == 0, result == -1,
#'                 home:season, away:season, home.adv = ~1,
#'                 tie.max = ~1, tie.scale = ~1, tie.mode = ~1,
#'                 at.home1 = at.home,
#'                 at.home2 = !at.home) - 1,
#'     eliminate = match, family = poisson, data = football.tri)
#' 
#' ### compare models
#' anova(Dav, scalDav, shifScalDav, test = "Chisq")
#' 
#' ### diagnostic plots
#' main <- c("Davidson", "Scaled Davidson", "Shifted & Scaled Davidson")
#' mod <- list(Dav, scalDav, shifScalDav)
#' names(mod) <- main
#' 
#' ## use football.tri data so that at.home can be found,
#' ## but restrict to actual match results
#' par(mfrow = c(2,2))
#' for (i in 1:3) {
#'     coef <- parameters(mod[[i]])
#'     plotProportions(result == 1, result == 0, result == -1,
#'                     home:season, away:season,
#'                     abilities = coef[alpha],
#'                     home.adv = coef["home.adv"],
#'                     tie.max = coef["tie.max"],
#'                     tie.scale = coef["tie.scale"],
#'                     tie.mode = coef["tie.mode"],
#'                     at.home1 = at.home,
#'                     at.home2 = !at.home,
#'                     main = main[i],
#'                     data = football.tri, subset = count == 1)
#' }
#' }
#' 
#' @importFrom stats coef plogis runif
#' @export
GenDavidson <- function(win, # TRUE/FALSE
                        tie, # TRUE/FALSE
                        loss, # TRUE/FALSE
                        player1, # player1 in each contest
                        player2, # ditto player2
                        home.adv = NULL,
                        tie.max = ~1,
                        tie.mode = NULL,
                        tie.scale = NULL,
                        at.home1 = NULL,
                        at.home2 = NULL){
    call <- as.expression(sys.call()[c(1,5:6)])
    extra <- NULL
    if (is.null(tie.max)) stop("a formula must be specified for tie.max")
    if (!is.null(home.adv) & is.null(at.home1))
        stop("at.home1 and at.home2 must be specified")
    has.home.adv <- !is.null(home.adv)
    has.tie.mode <- !is.null(tie.mode)
    has.tie.scale <- !is.null(tie.scale)
    if (has.home.adv) extra <- c(extra, list(home.adv = home.adv))
    if (has.tie.mode) extra <- c(extra, list(tie.mode = tie.mode))
    if (has.tie.scale) extra <- c(extra, list(tie.scale = tie.scale))
    i <- has.home.adv + has.tie.mode + has.tie.scale
    a <- match("home.adv", names(extra), 1)
    b <- match("tie.mode", names(extra), 1)
    c <- match("tie.scale", names(extra), 1)
    adv <- has.home.adv | has.tie.mode
    list(predictors = {c(extra,
                         list(tie.max = tie.max,
                              substitute(player1), # player1 & 2 are homogeneous
                              substitute(player2)))},
         ## substitutes "result" for "outcome", but also substitutes all of 
         ## code vector
         variables = {c(list(loss = substitute(loss),
                             tie = substitute(tie),
                             win = substitute(win)),
                        list(at.home1 = substitute(at.home1),
                             at.home2 = substitute(at.home2))[adv])},
         common =  c(1[has.home.adv], 2[has.tie.mode], 3[has.tie.scale], 
                     4, 5, 5),
         term = function(predLabels, varLabels){
             if (has.home.adv) {
                 ability1 <- paste("(", predLabels[a], ") * ", varLabels[4],
                                   " + ", predLabels[i + 2], sep = "")
                 ability2 <- paste("(", predLabels[a], ") * ", varLabels[5],
                                   " + ", predLabels[i + 3], sep = "")
             }
             else {
                 ability1 <- predLabels[i + 2]
                 ability2 <- predLabels[i + 3]
             }
             tie.scale <- ifelse(has.tie.scale, predLabels[c], 0)
             scale <- paste("exp(", tie.scale, ")", sep = "")
             if (has.tie.mode) {
                 psi1 <- paste("exp((", predLabels[b], ") * ",  varLabels[4],
                               ")", sep = "")
                 psi2 <- paste("exp((", predLabels[b], ") * ",  varLabels[5],
                               ")", sep = "")
                 weight1 <- paste(psi1, "/(", psi1, " + ", psi2, ")", sep = "")
                 weight2 <- paste(psi2, "/(", psi1, " + ", psi2, ")", sep = "")
             }
             else {
                 weight1 <- weight2 <- "0.5"
             }
             nu <- paste(predLabels[i + 1], " - ", scale, " * (",
                         weight1, " * log(", weight1, ") + ",
                         weight2, " * log(", weight2, "))", sep = "")
             paste(varLabels[1], " * (", ability2, ") + ",
                   varLabels[2], " * (", nu, " + ",
                   scale, " * ", weight1, " * (", ability1, ") + ",
                   scale, " * ", weight2, " * (", ability2, ") + ",
                   "(1 - ", scale, ") * ",
                   "log(exp(", ability1, ") + exp(", ability2, "))) + ",
                   varLabels[3], " * (", ability1, ")", sep = "")
         },
         start = function(theta) {
             init <- runif(length(theta)) - 0.5
             init[c] <- 0.5
         }
         )
}
class(GenDavidson) <- "nonlin"