File: merExpectedRank.R

package info (click to toggle)
r-cran-mertools 0.6.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,716 kB
  • sloc: sh: 13; makefile: 2
file content (146 lines) | stat: -rw-r--r-- 6,868 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#' Calculate the expected rank of random coefficients that account for
#' uncertainty.
#'
#' \code{expectedRank} calculates the expected rank and the percentile expected
#' rank of any random term in a merMod object.  A simple ranking of the estimated
#' random effects (as produced by \code{\link[lme4]{ranef}}) is not satisfactory
#' because it ignores any amount of uncertainty.
#'
#' Inspired by Lingsma et al. (2010, see also Laird and Louis 1989),
#' expectedRank sums the probability that each level of the grouping factor is
#' greater than every other level of the grouping factor, similar to a
#' two-sample t-test.
#'
#' The formula for the expected rank is:
#' \deqn{ExpectedRank_i = 1 + \sum \phi((\theta_i - \theta_k) / \sqrt(var(\theta_i)+var(\theta_k))}
#' where \eqn{\phi} is the standard normal distribution function, \eqn{\theta}
#' is the estimated random effect and \eqn{var(\theta)} is the posterior
#' variance of the estimated random effect. We add one to the sum so that the
#' minimum rank is one instead of zero so that in the case where there is no
#' overlap between the variances of the random effects (or if the variances are
#' zero), the expected rank equals the actual rank.  The ranks are ordered such
#' that the winners have ranks that are greater than the losers.
#'
#' The formula for the percentile expected rank is:
#' \deqn{100 * (ExpectedRank_i - 0.5) / N_grps}
#' where \eqn{N_grps} is the number of grouping factor levels. The percentile
#' expected rank can be interpreted as the fraction of levels that score at or
#' below the given level.
#'
#' NOTE: \code{expectedRank} will only work under conditions that \code{lme4::ranef}
#' will work. One current example of when this is \emph{not} the case is for
#' models when there are multiple terms specified per factor (e.g. uncorrelated random
#' coefficients for the same term, e.g.
#' \code{lmer(Reaction ~ Days + (1 | Subject) + (0 + Days | Subject), data = sleepstudy)})
#'
#' @param merMod An object of class merMod
#'
#' @param groupFctr An optional character vector specifying the name(s) the grouping factor(s)
#'   over which the random coefficient of interest varies.  This is the
#'   variable to the right of the pipe, \code{|}, in the [g]lmer formula.
#'   This parameter is optional. If none is specified all terms will be returned.
#'
#' @param term An optional character vector specifying the name(s) of the random coefficient of interest. This is the
#'   variable to the left of the pipe, \code{|}, in the [g]lmer formula. Partial
#'   matching is attempted on the intercept term so the following character
#'   strings will all return rankings based on the intercept (\emph{provided that
#'   they do not match the name of another random coefficient for that factor}):
#'   \code{c("(Intercept)", "Int", "intercep", ...)}.
#'
#' @return A data.frame with the following five columns:
#'   \describe{
#'     \item{groupFctr}{a character representing name of the grouping factor}
#'     \item{groupLevel}{a character representing the level of the grouping factor}
#'     \item{term}{a character representing the formula term for the group}
#'     \item{estimate}{effect estimate from \code{lme4::ranef(, condVar=TRUE)}).}
#'     \item{std.error}{the posterior variance of the estimate random effect
#'                      (from \code{lme4::ranef(, condVar=TRUE)}); named "\code{term}"_var.}
#'     \item{ER}{The expected rank.}
#'     \item{pctER}{The percentile expected rank.}
#'   }
#'
#' @references
#' Laird NM and Louis TA. Empirical Bayes Ranking Methods. \emph{Journal of
#' Education Statistics}. 1989;14(1)29-46. Available at
#' \url{http://www.jstor.org/stable/1164724}.
#'
#'
#' Lingsma HF, Steyerberg EW, Eijkemans MJC, et al. Comparing and
#' ranking hospitals based on outcome: results from The Netherlands Stroke Survey.
#' \emph{QJM: An International Journal of Medicine}. 2010;103(2):99-108.
#' doi:10.1093/qjmed/hcp169
#'
#' @examples
#' \donttest{
#' #For a one-level random intercept model
#' m1 <- lmer(Reaction ~ Days + (1 | Subject), sleepstudy)
#' (m1.er <- expectedRank(m1))
#'
#' #For a one-level random intercept model with multiple random terms
#' m2 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
#' #ranked by the random slope on Days
#' (m2.er1 <- expectedRank(m2, term="Days"))
#' #ranked by the random intercept
#' (m2.er2 <- expectedRank(m2, term="int"))
#'
#' #For a two-level model with random intercepts
#' m3 <- lmer(y ~ service * dept + (1|s) + (1|d), InstEval)
#' #Ranked by the random intercept on 's'
#' (m3.er1 <- expectedRank(m3, groupFctr="s", term="Intercept"))
#' }
#' @export
expectedRank <- function(merMod, groupFctr=NULL, term=NULL) {
  #Count random terms in merMod
  n.rfx <- lme4::getME(merMod, "k")
  n.rfac <- lme4::getME(merMod, "n_rfac")
  rfx <- lme4::ranef(merMod, condVar=TRUE)

  if(!is.null(groupFctr)){
    groupFctr <- groupFctr
  } else{
    groupFctr <- names(rfx)
  }
  out <- data.frame(groupFctr = NA, groupLevel = NA, term = NA,
                    estimate = NA, std.error = NA,
                    ER = NA, pctER = NA)

  for(i in groupFctr){
      rfx.names <- rownames(rfx[[i]])
      n.grps <- length(rfx.names)
      n.terms <- length(rfx[[i]])
      if(!is.null(term)){
        termIdx <- term
      } else{
        termIdx <- names(rfx[[i]])
      }
      for(j in termIdx){
        if (all(grepl("[iI]nt[a-z]*", j)) & is.na(match(j, names(rfx[[i]])))) {
           j <- "(Intercept)"
        }

        term.idx <- grep(j, names(rfx[[i]]), fixed=TRUE)
        theta <- rfx[[i]][,term.idx]
        var.theta <- attr(rfx[[i]], which="postVar")[term.idx, term.idx, 1:n.grps]
        #Calculate Expected Rank which is the sum of the probabilities that group i is greater than all
        #other groups j (assuming normal distribution of random effects)
        ER <- pctER <- rep(NA, n.grps)
        for (k in 1:n.grps) {
          ER[k] <- 1 + sum(pnorm((theta[k]-theta[-k]) / sqrt(var.theta[k] + var.theta[-k])))
        }
        #Calculated percentile expected rank ... the version of the formula I am using is
        #the percentage of groups that are ranked **equal to or less than** the selected
        #group ... if we just wanted percentage ranked less than then remove the 0.5
        pctER <- round(100 * (ER - 0.5) / n.grps)

        tmp <- data.frame(groupFctr = i, groupLevel = rfx.names, term = j,
                          estimate = theta, std.error = var.theta,
                          ER = ER, pctER = pctER)
        out <- rbind(out, tmp)
      }
  }

  out <- out[-1, ]
  # Avoid parentheses in parameter names
  out$term <- gsub("(Intercept)", "Intercept", out$term, fixed = TRUE)
  return(out)
}