1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
|
#' Semi-Partial (Part) Correlation Squared (\eqn{\Delta R^2})
#'
#' Compute the semi-partial (part) correlation squared (also known as
#' \eqn{\Delta R^2}). Currently, only `lm()` models are supported.
#'
#' @aliases r2_delta r2_part
#'
#' @param model An `lm` model.
#' @param type Type, either `"terms"`, or `"parameters"`.
#' @param ... Arguments passed to or from other methods.
#' @inheritParams eta_squared
#'
#' @return A data frame with the effect size.
#'
#' @details
#' This is similar to the last column of the "Conditional Dominance Statistics"
#' section of the [parameters::dominance_analysis()] output. For each term, the
#' model is refit *without* the columns on the [model
#' matrix][stats::model.matrix] that correspond to that term. The \eqn{R^2} of
#' this *sub*-model is then subtracted from the \eqn{R^2} of the *full* model to
#' yield the \eqn{\Delta R^2}. (For `type = "parameters"`, this is done for each
#' column in the model matrix.)
#'
#' **Note** that this is unlike [parameters::dominance_analysis()], where term
#' deletion is done via the formula interface, and therefore may lead to
#' different results.
#'
#' For other, non-`lm()` models, as well as more verbose information and
#' options, please see the documentation for [parameters::dominance_analysis()].
#'
#' # Confidence (Compatibility) Intervals (CIs)
#' Confidence intervals are based on the normal approximation as provided by Alf
#' and Graf (1999). An adjustment to the lower bound of the CI is used, to
#' improve the coverage properties of the CIs, according to Algina et al (2008):
#' If the *F* test associated with the \eqn{sr^2} is significant (at `1-ci`
#' level), but the lower bound of the CI is 0, it is set to a small value
#' (arbitrarily to a 10th of the estimated \eqn{sr^2}); if the *F* test is not
#' significant, the lower bound is set to 0. (Additionally, lower and upper
#' bound are "fixed" so that they cannot be smaller than 0 or larger than 1.)
#'
#' @inheritSection effectsize_CIs CIs and Significance Tests
#'
#' @seealso [eta_squared()], [cohens_f()] for comparing two models,
#' [parameters::dominance_analysis()] and
#' [parameters::standardize_parameters()].
#'
#' @references
#' - Alf Jr, E. F., & Graf, R. G. (1999). Asymptotic confidence limits for the
#' difference between two squared multiple correlations: A simplified approach.
#' *Psychological Methods, 4*(1), 70-75. \doi{10.1037/1082-989X.4.1.70}
#' - Algina, J., Keselman, H. J., & Penfield, R. D. (2008). Confidence intervals
#' for the squared multiple semipartial correlation coefficient. *Journal of
#' Modern Applied Statistical Methods, 7*(1), 2-10. \doi{10.22237/jmasm/1209614460}
#'
#' @examples
#' data("hardlyworking")
#'
#' m <- lm(salary ~ factor(n_comps) + xtra_hours * seniority, data = hardlyworking)
#'
#' r2_semipartial(m)
#'
#' r2_semipartial(m, type = "parameters")
#'
#'
#'
#' # Compare to `eta_squared()`
#' # --------------------------
#' npk.aov <- lm(yield ~ N + P + K, npk)
#'
#' # When predictors are orthogonal,
#' # eta_squared(partial = FALSE) gives the same effect size:
#' performance::check_collinearity(npk.aov)
#'
#' eta_squared(npk.aov, partial = FALSE)
#'
#' r2_semipartial(npk.aov)
#'
#' @examplesIf interactive()
#' # Compare to `dominance_analysis()`
#' # ---------------------------------
#' m_full <- lm(salary ~ ., data = hardlyworking)
#'
#' r2_semipartial(m_full)
#'
#' # Compare to last column of "Conditional Dominance Statistics":
#' parameters::dominance_analysis(m_full)
#'
#' @export
r2_semipartial <- function(model, type = c("terms", "parameters"),
ci = 0.95, alternative = "greater",
...) {
UseMethod("r2_semipartial")
}
#' @export
r2_semipartial.lm <- function(model, type = c("terms", "parameters"),
ci = 0.95, alternative = "greater",
...) {
type <- match.arg(type)
alternative <- .match.alt(alternative, FALSE)
y <- stats::model.frame(model)[[1]]
mm <- insight::get_modelmatrix(model)
mterms <- stats::terms(model)
has_incpt <- insight::has_intercept(model)
if (type == "terms") {
out <- data.frame(Term = attr(mterms, "term.labels"))
idx <- attr(mm, "assign")
idx_sub <- idx[idx > 0]
} else {
Parameter <- colnames(mm)
out <- data.frame(Parameter = Parameter)
out <- subset(out, Parameter != "(Intercept)")
idx <- seq.int(ncol(mm))
idx_sub <- idx[Parameter != "(Intercept)"]
}
tot_mod <- stats::lm(stats::reformulate("mm", response = "y", intercept = has_incpt))
sub_mods <- lapply(unique(idx_sub), function(.i) {
f <- stats::reformulate("mm[, .i != idx]", response = "y", intercept = has_incpt)
stats::lm(f)
})
tot_r2 <- performance::r2(model)[[1]]
sub_r2 <- lapply(sub_mods, performance::r2)
sub_r2 <- sapply(sub_r2, "[[", 1)
out$r2_semipartial <- as.vector(tot_r2 - sub_r2)
if (.test_ci(ci)) {
out$CI <- ci
ci.level <- .adjust_ci(ci, alternative)
alpha <- 1 - ci.level
zc <- stats::qnorm(alpha / 2, lower.tail = FALSE)
N <- insight::n_obs(model)
SE <- .delta_r2_SE(sub_r2, tot_r2, N)
out$CI_low <- pmax(out$r2_semipartial - zc * SE, 0)
out$CI_high <- pmin(out$r2_semipartial + zc * SE, 1)
ci_method <- list(method = "normal")
# Fix lower bound according to sig
p_comps <- sapply(sub_mods, function(.mod) {
anova(tot_mod, .mod)[2, "Pr(>F)"]
})
is_sig <- p_comps < (1 - ci)
lb_is_zero <- out$CI_low == 0
if (any(!is_sig)) {
out$CI_low[!is_sig] <- 0
}
if (any(is_sig & lb_is_zero)) {
out$CI_low[is_sig & lb_is_zero] <- out[is_sig & lb_is_zero, 2] * 0.01
}
out <- .limit_ci(out, alternative, 0, 1)
} else {
ci_method <- alternative <- NULL
}
class(out) <- c("effectsize_table", class(out))
attr(out, "ci") <- ci
attr(out, "ci_method") <- ci_method
attr(out, "approximate") <- FALSE
attr(out, "alternative") <- alternative
return(out)
}
# Utils -------------------------------------------------------------------
#' @keywords internal
.delta_r2_V <- function(R2, N) {
(4 * R2 * ((1 - R2)^2)) / N
}
#' @keywords internal
.delta_r2_COV <- function(R2r, R2f, N) {
rhor <- sqrt(R2r)
rhof <- sqrt(R2f)
(4 * rhof * rhor * (0.5 * (2 * rhor / rhof - rhor * rhof) * (1 - R2f - R2r - R2r / R2f) + (rhor / rhof)^3)) / N
}
#' @keywords internal
.delta_r2_SE <- function(R2r, R2f, N) {
Vf <- .delta_r2_V(R2f, N)
Vr <- .delta_r2_V(R2r, N)
COVfr <- .delta_r2_COV(R2r, R2f, N)
as.vector(sqrt(Vf + Vr - 2 * COVfr))
}
|