1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
|
### Terrence D. Jorgensen
### Last updated: 12 March 2025
##' Random Allocation of Items to Parcels in a Structural Equation Model
##'
##' @description
##' This function generates a given number of randomly generated item-to-parcel
##' allocations, fits a model to each allocation, and provides averaged results
##' over all allocations.
##'
##' @details
##' This function implements the random item-to-parcel allocation procedure
##' described in Sterba (2011) and Sterba and MacCallum (2010). The function
##' takes a single data set with item-level data, randomly assigns items to
##' parcels, fits a structural equation model to the parceled data using
##' [lavaan::lavaanList()], and repeats this process for a user-specified
##' number of random allocations. Results from all fitted models are summarized
##' in the output. For further details on the benefits of randomly allocating
##' items to parcels, see Sterba (2011) and Sterba and MacCallum (2010).
##'
##' @importFrom stats sd qnorm
##' @importFrom lavaan parTable lavInspect lavaanList lavaanify lavNames
##'
##' @param model [lavaan::lavaan()] model syntax specifying the model
##' fit to (at least some) parceled data. Note that there can be a mixture of
##' items and parcels (even within the same factor), in case certain items
##' should never be parceled. Can be a character string or parameter table.
##' Also see [lavaan::lavaanify()] for more details.
##' @param data A `data.frame` containing all observed variables appearing
##' in the `model`, as well as those in the `item.syntax` used to
##' create parcels. If the data have missing values, multiple imputation
##' before parceling is recommended: submit a stacked data set (with a variable
##' for the imputation number, so they can be separateed later) and set
##' `do.fit = FALSE` to return the list of `data.frame`s (one per
##' allocation), each of which is a stacked, imputed data set with parcels.
##' @param parcel.names `character` vector containing names of all parcels
##' appearing as indicators in `model`.
##' @param item.syntax [lavaan::model.syntax()] specifying the model
##' that would be fit to all of the unparceled items, including items that
##' should be randomly allocated to parcels appearing in `model`.
##' @param nAlloc The number of random items-to-parcels allocations to generate.
##' @param fun `character` string indicating the name of the
##' [lavaan::lavaan()] function used to fit `model` to
##' `data`. Can only take the values `"lavaan"`, `"sem"`,
##' `"cfa"`, or `"growth"`.
##' @param alpha Alpha level used as criterion for significance.
##' @param fit.measures `character` vector containing names of fit measures
##' to request from each fitted [lavaan::lavaan()] model. See the
##' output of [lavaan::fitMeasures()] for a list of available measures.
##' @param \dots Additional arguments to be passed to
##' [lavaan::lavaanList()]. See also [lavaan::lavOptions()]
##' @param show.progress If `TRUE`, show a [utils::txtProgressBar()]
##' indicating how fast the model-fitting iterates over allocations.
##' @param iseed (Optional) Random seed used for parceling items. When the same
##' random seed is specified and the program is re-run, the same allocations
##' will be generated. Using the same `iseed` argument will ensure the
##' any model is fit to the same parcel allocations. *Note*: When using
##' \pkg{parallel} options, you must first type `RNGkind("L'Ecuyer-CMRG")`
##' into the R Console, so that the seed will be controlled across cores.
##' @param do.fit If `TRUE` (default), the `model` is fitted to each
##' parceled data set, and the summary of results is returned (see the Value
##' section below). If `FALSE`, the items are randomly parceled, but the
##' model is not fit; instead, the `list` of `data.frame`s is
##' returned (so assign it to an object).
##' @param return.fit If `TRUE`, a [lavaan::lavaanList-class] object
##' is returned with the `list` of results across allocations
##' @param warn Whether to print warnings when fitting `model` to each allocation
##'
##' @return
##' \item{Estimates}{A `data.frame` containing results related to
##' parameter estimates with columns corresponding to their names; average
##' and standard deviation across allocations; minimum, maximum, and range
##' across allocations; and the proportion of allocations in which each
##' parameter estimate was significant.}
##' \item{SE}{A `data.frame` containing results similar to
##' `Estimates`, but related to the standard errors of parameter
##' estimates.}
##' \item{Fit}{A `data.frame` containing results related to model fit,
##' with columns corresponding to fit index names; their average and
##' standard deviation across allocations; the minimum, maximum, and range
##' across allocations; and (if the test statistic or RMSEA is included in
##' `fit.measures`) the proportion of allocations in which each
##' test of (exact or close) fit was significant.}
##' \item{Model}{A [lavaan::lavaanList-class] object containing results
##' of the `model` fitted to each parcel allocation. Only returned if
##' `return.fit = TRUE`.}
##'
##' @author
##' Terrence D. Jorgensen (University of Amsterdam; \email{TJorgensen314@@gmail.com})
##'
##' @seealso [PAVranking()] for comparing 2 models,
##' [poolMAlloc()] for choosing the number of allocations
##'
##' @references
##'
##' Sterba, S. K. (2011). Implications of parcel-allocation
##' variability for comparing fit of item-solutions and parcel-solutions.
##' *Structural Equation Modeling, 18*(4), 554--577.
##' \doi{10.1080/10705511.2011.607073}
##'
##' Sterba, S. K. & MacCallum, R. C. (2010). Variability in parameter estimates
##' and model fit across random allocations of items to parcels.
##' *Multivariate Behavioral Research, 45*(2), 322--358.
##' \doi{10.1080/00273171003680302}
##'
##' Sterba, S. K., & Rights, J. D. (2016). Accounting for parcel-allocation
##' variability in practice: Combining sources of uncertainty and choosing the
##' number of allocations. *Multivariate Behavioral Research, 51*(2--3),
##' 296--313. \doi{10.1080/00273171.2016.1144502}
##'
##' Sterba, S. K., & Rights, J. D. (2017). Effects of parceling on model
##' selection: Parcel-allocation variability in model ranking.
##' *Psychological Methods, 22*(1), 47--68. \doi{10.1037/met0000067}
##'
##' @examples
##'
##' ## Fit 2-factor CFA to simulated data. Each factor has 9 indicators.
##'
##' ## Specify the item-level model (if NO parcels were created)
##' item.syntax <- c(paste0("f1 =~ f1item", 1:9),
##' paste0("f2 =~ f2item", 1:9))
##' cat(item.syntax, sep = "\n")
##' ## Below, we reduce the size of this same model by
##' ## applying different parceling schemes
##'
##'
##' ## 3-indicator parcels
##' mod.parcels <- '
##' f1 =~ par1 + par2 + par3
##' f2 =~ par4 + par5 + par6
##' '
##' ## names of parcels
##' (parcel.names <- paste0("par", 1:6))
##'
##' \donttest{
##' ## override default random-number generator to use parallel options
##' RNGkind("L'Ecuyer-CMRG")
##'
##' parcelAllocation(mod.parcels, data = simParcel, nAlloc = 100,
##' parcel.names = parcel.names, item.syntax = item.syntax,
##' # parallel = "multicore", # parallel available in Mac/Linux
##' std.lv = TRUE) # any addition lavaan arguments
##'
##'
##'
##' ## POOL RESULTS by treating parcel allocations as multiple imputations
##' ## Details provided in Sterba & Rights (2016); see ?poolMAlloc.
##'
##' ## save list of data sets instead of fitting model yet
##' dataList <- parcelAllocation(mod.parcels, data = simParcel, nAlloc = 100,
##' parcel.names = parcel.names,
##' item.syntax = item.syntax,
##' do.fit = FALSE)
##' ## now fit the model to each data set
##' library(lavaan.mi)
##' fit.parcels <- cfa.mi(mod.parcels, data = dataList, std.lv = TRUE)
##' summary(fit.parcels) # pooled using Rubin's rules
##' anova(fit.parcels) # pooled test statistic
##' help(package = "lavaan.mi") # find more methods for pooling results
##' }
##'
##'
##' ## multigroup example
##' simParcel$group <- 0:1 # arbitrary groups for example
##' mod.mg <- '
##' f1 =~ par1 + c(L2, L2)*par2 + par3
##' f2 =~ par4 + par5 + par6
##' '
##' ## names of parcels
##' (parcel.names <- paste0("par", 1:6))
##'
##' parcelAllocation(mod.mg, data = simParcel, parcel.names, item.syntax,
##' std.lv = TRUE, group = "group", group.equal = "loadings",
##' nAlloc = 20, show.progress = TRUE)
##'
##'
##'
##' ## parcels for first factor, items for second factor
##' mod.items <- '
##' f1 =~ par1 + par2 + par3
##' f2 =~ f2item2 + f2item7 + f2item8
##' '
##' ## names of parcels
##' (parcel.names <- paste0("par", 1:3))
##'
##' parcelAllocation(mod.items, data = simParcel, parcel.names, item.syntax,
##' nAlloc = 20, std.lv = TRUE)
##'
##'
##'
##' ## mixture of 1- and 3-indicator parcels for second factor
##' mod.mix <- '
##' f1 =~ par1 + par2 + par3
##' f2 =~ f2item2 + f2item7 + f2item8 + par4 + par5 + par6
##' '
##' ## names of parcels
##' (parcel.names <- paste0("par", 1:6))
##'
##' parcelAllocation(mod.mix, data = simParcel, parcel.names, item.syntax,
##' nAlloc = 20, std.lv = TRUE)
##'
##' @export
parcelAllocation <- function(model, data, parcel.names, item.syntax,
nAlloc = 100, fun = "sem", alpha = .05,
fit.measures = c("chisq","df","cfi",
"tli","rmsea","srmr"), ...,
show.progress = FALSE, iseed = 12345,
do.fit = TRUE, return.fit = FALSE, warn = FALSE) {
if (nAlloc < 2) stop("Minimum of two allocations required.")
if (!fun %in% c("sem","cfa","growth","lavaan"))
stop("'fun' argument must be either 'lavaan', 'cfa', 'sem', or 'growth'")
lavArgs <- list(...)
lavArgs$model <- item.syntax
lavArgs$data <- data
lavArgs$do.fit <- FALSE
## fit item-level model to data
item.fit <- do.call(fun, lavArgs)
item.PT <- parTable(item.fit)
## construct parameter table for parcel-level model
if (is.character(model)) {
## default lavaanify arguments
ptArgs <- formals(lavaanify)
## arguments passed to lavaan by user
fitArgs <- lavInspect(item.fit, "call")[-1]
## overwrite defaults with user's values
sameArgs <- intersect(names(ptArgs), names(fitArgs))
ptArgs[sameArgs] <- fitArgs[sameArgs]
ptArgs$model <- model
if (is.null(ptArgs$model.type)) ptArgs$model.type <- "sem"
if (ptArgs$model.type != "growth") ptArgs$model.type <- "sem"
ptArgs$ngroups <- lavInspect(item.fit, "ngroups")
PT <- do.call("lavaanify", ptArgs)
} else if (is.data.frame(model)) {
PT <- model
} else stop("'model' argument must be a character string of lavaan model",
" syntax or a lavaan parameter table. See ?lavaanify help page.")
## check that both models specify the same factors
## NOTE: lv.names remains unaltered, to omit latent indicators from $items
factorNames <- lv.names <- lavNames(PT, type = "lv")
if (!all(sort(lavNames(item.PT, type = "lv")) == sort(factorNames))) {
stop("'model' and 'item.syntax' arguments specify different factors.\n",
"'model' specifies: ", paste(sort(factorNames), collapse = ", "), "\n",
"'item.syntax' specifies: ", paste(sort(lavNames(item.PT,
type = "lv")),
collapse = ", "))
}
## for each factor, assign item sets to parcel sets
assignments <- list()
for (i in factorNames) {
## all indicators from parcel-level model
parcels <- PT$rhs[PT$lhs == i & PT$op == "=~"]
## all indicators from item-level model
items <- item.PT$rhs[item.PT$lhs == i & item.PT$op == "=~"]
## exclude observed indicators from parceling scheme if specified
## in parcel-level model
assignments[[i]]$parcels <- setdiff(parcels, c(names(data), lv.names))
assignments[[i]]$items <- setdiff(items , c( parcels , lv.names))
## Does this factor have parcels? If not, omit this factor from next loop
if (length(assignments[[i]]$parcels) == 0L) {
factorNames <- factorNames[-which(factorNames == i)]
next
}
## how many items per parcel?
nItems <- length(assignments[[i]]$items)
nParcels <- length(assignments[[i]]$parcels)
assignments[[i]]$nPerParcel <- rep(nItems %/% nParcels, nParcels)
if (nItems %% nParcels > 0) for (j in 1:(nItems %% nParcels)) {
assignments[[i]]$nPerParcel[j] <- assignments[[i]]$nPerParcel[j] + 1
}
names(assignments[[i]]$nPerParcel) <- assignments[[i]]$parcels
}
## for each allocation, create parcels from items
dataList <- list()
for (i in 1:nAlloc) {
dataList[[i]] <- data
for (j in factorNames) {
## create a random assignment pattern
ranAss <- sample(rep(names(assignments[[j]]$nPerParcel),
times = assignments[[j]]$nPerParcel))
## add each parcel to a copy of the original data set
for (k in assignments[[j]]$parcels) {
## which items were selected for this parcel?
ranVars <- assignments[[j]]$items[ranAss == k]
## calculate row means of those items, save as parcel
dataList[[i]][ , k] <- rowMeans(data[ , ranVars])
}
}
}
if (!do.fit) return(dataList)
## fit parcel-level model to list of data sets
set.seed(iseed) # in case not using parallel
fitList <- lavaanList(model, dataList, cmd = fun, ..., warn = warn, iseed = iseed,
FUN = lavaan::fitMeasures, show.progress = show.progress)
## for which data sets did the model converge?
conv <- fitList@meta$ok
if (!any(conv)) stop("The model did not converge for any allocations.")
if (!all(conv)) message("The model did not converge for the following ",
"allocations: ", paste(which(!conv), collapse = ", "))
## tools to extract output
getOutput <- function(x, sig = FALSE) {
c(Avg = mean(x, na.rm = TRUE), SD = sd(x, na.rm = TRUE),
Min = min(x, na.rm = TRUE), Max = max(x, na.rm = TRUE),
Range = max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}
out <- list()
myCols <- c("lhs","op","rhs","group", "block","label")
template <- data.frame(fitList@ParTableList[[which(conv)[1]]][myCols])
## parameter estimates
Est <- sapply(fitList@ParTableList[conv], function(x) x$est)
out$Estimates <- cbind(template, t(apply(Est, 1, getOutput)))
## standard errors
SE <- sapply(fitList@ParTableList[conv], function(x) x$se)
## Any for which SE could not be calculated?
missingSE <- apply(SE, 2, function(x) any(is.na(x)))
if (!all(missingSE)) {
if (any(missingSE)) message("Standard errors could not be computed for ",
"the following allocations: ",
paste(which(missingSE), collapse = ", "))
out$SE <- cbind(template, t(apply(SE[ , !missingSE], 1, getOutput)))
## add significance test results to $Estimates
Sig <- abs(Est[, !missingSE] / SE[, !missingSE]) > qnorm(alpha / 2,
lower.tail = FALSE)
out$Estimates$Percent_Sig <- rowMeans(Sig)
out$Estimates$Percent_Sig[fitList@ParTableList[[which(conv)[1]]]$free == 0L] <- NA
} else {
message("Standard errors could not be calculated for any converged",
" data sets, so no significance tests could be conducted.")
out$SE <- NULL
}
## fit measures
Fit <- do.call(cbind, fitList@funList[conv])[fit.measures, ]
out$Fit <- data.frame(t(apply(Fit, 1, getOutput)))
if (any(grepl(pattern = "chisq", fit.measures))) {
out$Fit$Percent_Sig <- NA
if ("chisq" %in% fit.measures) {
pvalues <- sapply(fitList@funList[conv], "[", i = "pvalue")
out$Fit["chisq", "Percent_Sig"] <- mean(pvalues < alpha, na.rm = TRUE)
}
if ("chisq.scaled" %in% fit.measures) {
pvalues <- sapply(fitList@funList[conv], "[", i = "pvalue.scaled")
out$Fit["chisq.scaled", "Percent_Sig"] <- mean(pvalues < alpha, na.rm = TRUE)
}
}
if (any(grepl(pattern = "rmsea", fit.measures))) {
if (is.null(out$Fit$Percent_Sig)) out$Fit$Percent_Sig <- NA
if ("rmsea" %in% fit.measures) {
pvalues <- sapply(fitList@funList[conv], "[", i = "rmsea.pvalue")
out$Fit["rmsea", "Percent_Sig"] <- mean(pvalues < alpha, na.rm = TRUE)
}
if ("rmsea.scaled" %in% fit.measures) {
pvalues <- sapply(fitList@funList[conv], "[", i = "rmsea.pvalue.scaled")
out$Fit["rmsea.scaled", "Percent_Sig"] <- mean(pvalues < alpha, na.rm = TRUE)
}
}
## check for robust test
if (any(grepl(pattern = "scaled", names(fitList@funList[conv][[1]]))) &
!any(grepl(pattern = "scaled", fit.measures))) {
warning('Robust test requested, but "fit.measures" argument does not',
' include any scaled measures (e.g., "chisq.scaled", ',
'"rmsea.scaled", or "rmsea.robust").')
}
## remove rows that do not correspond to estimates
out$Estimates <- out$Estimates[fitList@ParTableList[[which(conv)[1]]]$group > 0L, ]
if (!is.null(out$SE)) out$SE <- out$SE[fitList@ParTableList[[which(conv)[1]]]$group > 0L, ]
## assign class for lavaan's print method
class(out$Estimates) <- c("lavaan.data.frame","data.frame")
if (!is.null(out$SE)) class(out$SE) <- c("lavaan.data.frame","data.frame")
class(out$Fit) <- c("lavaan.data.frame","data.frame")
## return output
if (return.fit) {
out$Model <- fitList
out$Model@external$dataList <- dataList
}
out
}
|