1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
|
#' Random Effects Reliability
#'
#' @description These functions provide information about the reliability of
#' group-level estimates (i.e., random effects) in mixed models. They are useful
#' to assess whether the predictors yield consistent group-level variability.
#' "Group-level" can refer, for instance, to different participants in a study,
#' and the predictors to the effect of some experimental condition.
#'
#' The conceptually related functions are implemented,
#' `performance_reliability()`, based on Rouder & Mehrvarz (2024) that uses
#' estimated model variances, and `performance_dvour()` (d-vour), which
#' corresponds to the Variability-Over-Uncertainty Ratio ("vour") between random
#' effects coefficient variability and their associated uncertainty.
#'
#' **Note**: `performance_reliability()` requires to recompute the model to
#' estimate some of the variances of interest, which does not make it very
#' usable with Bayesian models. Please get in touch if you have would like to
#' help addressing this.
#'
#' @param x A model object.
#' @param ... Currently not used.
#'
#' @details
#'
#' ## Reliability (Signal-to-Noise Ratio)
#'
#' `performance_reliability()` estimates the reliability of random effects
#' (intercepts and slopes) in mixed-effects models using variance decomposition.
#' This method follows the **hierarchical modeling** framework of Rouder &
#' Mehrvarz (2024), defining reliability as the **signal-to-noise variance
#' ratio**:
#'
#' \deqn{\gamma^2 = \frac{\sigma_B^2}{\sigma_B^2 + \sigma_W^2}}
#'
#' where:
#' - \eqn{\sigma_B^2} is the **between-subject variance** (i.e., variability
#' across groups).
#' - \eqn{\sigma_W^2} is the **within-subject variance** (i.e., trial-level
#' measurement noise).
#'
#' This metric quantifies **how much observed variability is due to actual
#' differences between groups**, rather than measurement error or within-group
#' fluctuations.
#'
#' To account for **trial count (\eqn{L})**, reliability is adjusted following:
#'
#' \deqn{E(r) = \frac{\gamma^2}{\gamma^2 + 1/L}}
#'
#' where \eqn{L} is the number of **observations per random effect level** (note
#' that Rouder (2024) recommends 2/L to adjust for contrast effects).
#'
#' ## Variability-Over-Uncertainty Ratio (d-vour)
#'
#' `performance_dvour()` computes an alternative reliability measure corresponding
#' to the normalized **ratio of observed variability to uncertainty in random effect
#' estimates**. This is defined as:
#'
#' \deqn{\text{D-vour} = \frac{\sigma_B^2}{\sigma_B^2 + \mu_{\text{SE}}^2}}
#'
#' where:
#' - \eqn{\sigma_B^2} is the **between-group variability** (computed as the SD
#' of the random effect estimates).
#' - \eqn{\mu_{\text{SE}}^2} is the **mean squared uncertainty** in random
#' effect estimates (i.e., the average uncertainty).
#'
#' ### Interpretation:
#'
#' - **D-vour > 0.75**: Strong group-level effects (between-group variance is at
#' least 3 times greater than uncertainty).
#' - **D-vour ~ 0.5**: Within-group and between-group variability are similar;
#' random effect estimates should be used with caution.
#' - **D-vour < 0.5**: Measurement noise dominates; random effect estimates are
#' probably unreliable.
#'
#' While d-vour shares some similarity to Rouder's Reliability, it does not
#' explicitly model within-group trial-level noise and is only based on the
#' random effect estimates, and can thus be not accurate when there is not
#' a lot of random factor groups (the reliability of this index -
#' the meta-reliability - depends on the number of groups).
#'
#' @references
#' - Rouder, J. N., Pena, A. L., Mehrvarz, M., & Vandekerckhove, J. (2024). On
#' Cronbach’s merger: Why experiments may not be suitable for measuring
#' individual differences.
#' - Rouder, J. N., & Mehrvarz, M. (2024). Hierarchical-model insights for
#' planning and interpreting individual-difference studies of cognitive
#' abilities. Current Directions in Psychological Science, 33(2), 128-135.
#' - Williams, D. R., Mulder, J., Rouder, J. N., & Rast, P. (2021). Beneath the
#' surface: Unearthing within-person variability and mean relations with
#' Bayesian mixed models. Psychological methods, 26(1), 74.
#' - Williams, D. R., Martin, S. R., DeBolt, M., Oakes, L., & Rast, P. (2020). A
#' fine-tooth comb for measurement reliability: Predicting true score and
#' error variance in hierarchical models.
#'
#' @examplesIf all(insight::check_if_installed(c("lme4", "glmmTMB"), quietly = TRUE))
#' url <- "https://raw.githubusercontent.com/easystats/circus/refs/heads/main/data/illusiongame.csv"
#' df <- read.csv(url)
#'
#' m <- lme4::lmer(RT ~ (1 | Participant), data = df)
#' performance_reliability(m)
#' performance_dvour(m)
#'
#' m <- glmmTMB::glmmTMB(RT ~ (1 | Participant), data = df)
#' performance_reliability(m)
#' performance_dvour(m)
#'
#' m <- lme4::lmer(RT ~ (1 | Participant) + (1 | Trial), data = df)
#' performance_reliability(m)
#' performance_dvour(m)
#'
#' m <- glmmTMB::glmmTMB(RT ~ (1 | Participant) + (1 | Trial), data = df)
#' performance_reliability(m)
#' performance_dvour(m)
#'
#' \donttest{
#' m <- lme4::lmer(
#' RT ~ Illusion_Difference + (Illusion_Difference | Participant) + (1 | Trial),
#' data = df
#' )
#' performance_reliability(m)
#' performance_dvour(m)
#'
#' m <- glmmTMB::glmmTMB(
#' RT ~ Illusion_Difference + (Illusion_Difference | Participant) + (1 | Trial),
#' data = df
#' )
#' performance_reliability(m)
#' performance_dvour(m)
#' }
#' @export
performance_reliability <- function(x, ...) {
UseMethod("performance_reliability")
}
#' @export
performance_reliability.default <- function(x, ...) {
# sanity check
if (!insight::is_model(x)) {
insight::format_error("`x` must be a regression model object.")
}
# Find how many observations per random effect (n-trials)
random <- lapply(insight::get_random(x), function(z) min(table(z)))
v <- insight::get_variance(x) # Extract variance components
original_params <- parameters::parameters(x, effects = "grouplevel", verbose = FALSE)
params <- as.data.frame(original_params)
# bayesian model? if yes, copy clean parameter names
if (isTRUE(insight::model_info(x)$is_bayesian)) {
if (inherits(x, "brmsfit")) {
params$Parameter <- gsub("(.*)\\[(.*),(.*)\\]", "\\3", params$Parameter)
} else if (inherits(x, c("stanreg", "stanfit"))) {
params$Parameter <- gsub("b\\[(.*)\\s(.*)", "\\1", params$Parameter)
}
}
reliability <- data.frame()
for (grp in unique(params$Group)) {
for (param in unique(params$Parameter)) {
# Store group-level results
rez <- data.frame(
Group = grp,
Parameter = param
)
# Based on Rouder's (2024) paper https://journals.sagepub.com/doi/10.1177/09637214231220923
# "What part of reliability is invariant to trial size? Consider the ratio
# sigma_B^2 / sigma_W^2. This is a signal-to-noise variance ratio - it is
# how much more variable people are relative to trial noise. Let gamma2
# denote this ratio. With it, the reliability coefficient follows (eq. 1):
# E(r) = gamma2 / (gamma2 + 2/L)" (or 1/L for non-contrast tasks, see
# annotation 4)
# Number of trials per group
L <- random[[grp]]
# Extract variances
if (param %in% c("(Intercept)", "Intercept")) {
var_between <- v$var.intercept[grp]
} else {
var_between <- v$var.slope[paste0(grp, ".", param)]
}
# Non-adjusted index
# rez$Reliability <- var_between / (var_between + v$var.residual)
# Adjusted index:
# Rouder & Mehrvarz suggest 1/L for non-contrast tasks and 2/L for contrast tasks.
rez$Reliability <- var_between / (var_between + v$var.residual + 1 / L)
# The parameter γ is the signal-to-noise standard-deviation ratio. It is
# often convenient for communication as standard deviations are sometimes
# more convenient than variances.
# rez$Reliability_adjusted <- sqrt(rez$Reliability_adjusted)
reliability <- rbind(reliability, rez)
}
}
# clean
reliability[!is.na(reliability$Reliability), ]
}
# d-vour ------------------------------------------------------------------
#' @rdname performance_reliability
#' @export
performance_dvour <- function(x, ...) {
UseMethod("performance_dvour")
}
#' @export
performance_dvour.default <- function(x, ...) {
insight::check_if_installed("modelbased", minimum_version = "0.10.0")
performance_dvour(modelbased::estimate_grouplevel(x, ...), ...)
}
#' @export
performance_dvour.estimate_grouplevel <- function(x, ...) {
coefname <- attributes(x)$coef_name
dispname <- grep("SE|SD|MAD", colnames(x), value = TRUE)
# Sanity checks
if (insight::n_unique(x$Level) <= 3) {
insight::format_alert(paste0(
"The number of random effects group levels (N=",
insight::n_unique(x$Level),
") might be too low to reliably estimate the variability."
))
}
if (length(dispname) == 0) {
insight::format_error(paste0(
"This function requires an index of variability of each random ",
"effect (e.g., SE) but none was found. Try running `check_reliability()` on the ",
"output of `modelbased::estimate_grouplevel(model)`, and make sure the latter ",
"returns a table with an index of dispersion."
))
}
if (length(dispname) > 1) {
insight::format_alert(paste0(
"Multiple indices of variability were found (",
toString(dispname),
"). Using the first one."
))
dispname <- dispname[1]
}
# Compute reliability
if (!"Component" %in% names(x)) {
x$Component <- "TEMP"
}
reliability <- data.frame()
for (comp in unique(x$Component)) {
for (grp in unique(x$Group)) {
for (param in unique(x$Parameter)) {
d <- x[x$Component == comp & x$Group == grp & x$Parameter == param, ]
if (nrow(d) == 0) {
next
}
# Store group-level results
rez <- data.frame(
Component = comp,
Group = grp,
Parameter = param
)
# Variability-Over-Uncertainty Ratio (d-vour)
# This index is based on the information contained in the group-level estimates.
var_between <- stats::sd(d[[coefname]]) # Variability
var_within <- mean(d[[dispname]]) # Average Uncertainty
rez$D_vour <- var_between^2 / (var_between^2 + var_within^2)
# Alternative 1: average of level-specific reliability
# Inspired by the hlmer package (R version of HLM7 by Raudenbush et al., 2014)
# rez$Dvour2 <- mean(d[[coefname]]^2 / (d[[coefname]]^2 + d[[dispname]]^2))
reliability <- rbind(reliability, rez)
}
}
}
# Clean-up output
if (
insight::has_single_value(reliability$Component, remove_na = TRUE) &&
unique(reliability$Component) == "TEMP"
) {
reliability$Component <- NULL
}
reliability
}
|