1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
|
#' Check suitability of data for Factor Analysis (FA) with Bartlett's Test of Sphericity and KMO
#'
#' This checks whether the data is appropriate for Factor Analysis (FA) by
#' running the Bartlett's Test of Sphericity and the Kaiser, Meyer, Olkin (KMO)
#' Measure of Sampling Adequacy (MSA). See **details** below for more information
#' about the interpretation and meaning of each test.
#'
#'
#' @details
#' ### Bartlett's Test of Sphericity
#'
#' Bartlett's (1951) test of sphericity tests whether a matrix (of correlations)
#' is significantly different from an identity matrix (filled with 0). It tests
#' whether the correlation coefficients are all 0. The test computes the
#' probability that the correlation matrix has significant correlations among at
#' least some of the variables in a dataset, a prerequisite for factor analysis
#' to work.
#'
#' While it is often suggested to check whether Bartlett’s test of sphericity is
#' significant before starting with factor analysis, one needs to remember that
#' the test is testing a pretty extreme scenario (that all correlations are non-significant).
#' As the sample size increases, this test tends to be always significant, which
#' makes it not particularly useful or informative in well-powered studies.
#'
#' ### Kaiser, Meyer, Olkin (KMO)
#'
#' *(Measure of Sampling Adequacy (MSA) for Factor Analysis.)*
#'
#' Kaiser (1970) introduced a Measure of Sampling Adequacy (MSA), later modified
#' by Kaiser and Rice (1974). The Kaiser-Meyer-Olkin (KMO) statistic, which can
#' vary from 0 to 1, indicates the degree to which each variable in a set is
#' predicted without error by the other variables.
#'
#' A value of 0 indicates that the sum of partial correlations is large relative
#' to the sum correlations, indicating factor analysis is likely to be
#' inappropriate. A KMO value close to 1 indicates that the sum of partial
#' correlations is not large relative to the sum of correlations and so factor
#' analysis should yield distinct and reliable factors. It means that patterns
#' of correlations are relatively compact, and so factor analysis should yield
#' distinct and reliable factors. Values smaller than 0.5 suggest that you should
#' either collect more data or rethink which variables to include.
#'
#' Kaiser (1974) suggested that KMO > .9 were marvelous, in the .80s,
#' meritorious, in the .70s, middling, in the .60s, mediocre, in the .50s,
#' miserable, and less than .5, unacceptable. Hair et al. (2006) suggest
#' accepting a value > 0.5. Values between 0.5 and 0.7 are mediocre, and values
#' between 0.7 and 0.8 are good.
#'
#' Variables with individual KMO values below 0.5 could be considered for
#' exclusion them from the analysis (note that you would need to re-compute the
#' KMO indices as they are dependent on the whole dataset).
#'
#' @param x A data frame or a correlation matrix. If the latter is passed, `n`
#' must be provided.
#' @param n If a correlation matrix was passed, the number of observations must
#' be specified.
#' @param ... Arguments passed to or from other methods.
#'
#'
#' @examples
#' library(performance)
#'
#' check_factorstructure(mtcars)
#'
#' # One can also pass a correlation matrix
#' r <- cor(mtcars)
#' check_factorstructure(r, n = nrow(mtcars))
#'
#' @return A list of lists of indices related to sphericity and KMO.
#' @seealso [`check_clusterstructure()`].
#'
#' @references
#' This function is a wrapper around the `KMO` and the `cortest.bartlett()`
#' functions in the **psych** package (Revelle, 2016).
#'
#' - Revelle, W. (2016). How To: Use the psych package for Factor Analysis
#' and data reduction.
#'
#' - Bartlett, M. S. (1951). The effect of standardization on a Chi-square
#' approximation in factor analysis. Biometrika, 38(3/4), 337-344.
#' - Kaiser, H. F. (1970). A second generation little jiffy.
#' Psychometrika, 35(4), 401-415.
#'
#' - Kaiser, H. F., & Rice, J. (1974). Little jiffy, mark IV. Educational
#' and psychological measurement, 34(1), 111-117.
#'
#' - Kaiser, H. F. (1974). An index of factorial simplicity.
#' Psychometrika, 39(1), 31-36.
#'
#' @export
check_factorstructure <- function(x, n = NULL, ...) {
# TODO: detect (and remove?) factors
kmo <- check_kmo(x, n, ...)
sphericity <- check_sphericity_bartlett(x, n, ...)
res_text <- paste0(
"\n - Sphericity: ",
attributes(sphericity)$text,
"\n - KMO: ",
attributes(kmo)$text
)
if (attributes(kmo)$color == "red" || attributes(sphericity)$color == "red") {
color <- "red"
} else {
color <- "green"
}
out <- list(KMO = kmo, sphericity = sphericity)
attr(out, "text") <- res_text
attr(out, "color") <- color
attr(out, "title") <- "Is the data suitable for Factor Analysis?"
class(out) <- c("easystats_check", class(out))
out
}
#' @rdname check_factorstructure
#' @export
check_kmo <- function(x, n = NULL, ...) {
out <- .validate_factor_structure(x, n, ...)
Q <- solve(out$r)
Q <- stats::cov2cor(Q)
diag(Q) <- 0
diag(out$r) <- 0
sumQ2 <- sum(Q^2)
sumr2 <- sum(out$r^2)
MSA <- sumr2 / (sumr2 + sumQ2)
MSA_variable <- colSums(out$r^2) / (colSums(out$r^2) + colSums(Q^2))
out <- list(MSA = MSA, MSA_variable = MSA_variable)
# TODO: add interpret_kmo in effectsize and use that here for more fine-grained interpretation
if (MSA < 0.5) {
msg_text <- sprintf(
"The Kaiser, Meyer, Olkin (KMO) overall measure of sampling adequacy suggests that factor analysis is likely to be inappropriate (KMO = %.2f).", # nolint
MSA
)
color <- "red"
} else {
msg_text <- sprintf(
"The Kaiser, Meyer, Olkin (KMO) overall measure of sampling adequacy suggests that data seems appropriate for factor analysis (KMO = %.2f).", # nolint
MSA
)
color <- "green"
}
# Individual scores:
text_ind <- toString(paste0(
names(MSA_variable),
" (",
insight::format_value(MSA_variable),
ifelse(MSA_variable < 0.5, "*", ""),
")"
))
msg_text <- paste0(msg_text, " The individual KMO scores are: ", text_ind, ".")
attr(out, "text") <- msg_text
attr(out, "color") <- color
attr(out, "title") <- "KMO Measure of Sampling Adequacy"
class(out) <- c("easystats_check", class(out))
out
}
#' @rdname check_factorstructure
#' @export
check_sphericity_bartlett <- function(x, n = NULL, ...) {
out <- .validate_factor_structure(x, n, ...)
p <- dim(out$r)[2]
detR <- det(out$r)
statistic <- -log(detR) * (out$n - 1 - (2 * p + 5) / 6)
dof <- p * (p - 1) / 2
pval <- stats::pchisq(statistic, df = dof, lower.tail = FALSE)
out <- list(chisq = statistic, p = pval, dof = dof)
if (pval < 0.001) {
msg_text <- sprintf(
"Bartlett's test of sphericity suggests that there is sufficient significant correlation in the data for factor analysis (Chisq(%i) = %.2f, %s).", # nolint
dof,
statistic,
insight::format_p(pval)
)
color <- "green"
} else {
msg_text <- sprintf(
"Bartlett's test of sphericity suggests that there is not enough significant correlation in the data for factor analysis (Chisq(%i) = %.2f, %s).", # nolint
dof,
statistic,
insight::format_p(pval)
)
color <- "red"
}
attr(out, "text") <- msg_text
attr(out, "color") <- color
attr(out, "title") <- "Test of Sphericity"
class(out) <- c("easystats_check", class(out))
out
}
# Helpers -----------------------------------------------------------------
#' @keywords internal
.validate_factor_structure <- function(x, n = NULL, ...) {
if (is.null(n)) {
r <- stats::cor(x, use = "pairwise.complete.obs", ...)
n <- nrow(x)
} else {
r <- x
}
if (nrow(r) != ncol(r)) {
insight::format_error("The correlation matrix is not square.")
}
return(list(n = n, r = r))
}
|