1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fmi.R
\name{fmi}
\alias{fmi}
\title{Fraction of Missing Information.}
\usage{
fmi(data, method = "saturated", group = NULL, ords = NULL,
varnames = NULL, exclude = NULL, return.fit = FALSE)
}
\arguments{
\item{data}{Either a single \code{data.frame} with incomplete observations,
or a \code{list} of imputed data sets.}
\item{method}{character. If \code{"saturated"} or \code{"sat"} (default),
the model used to estimate FMI is a freely estimated covariance matrix and
mean vector for numeric variables, and/or polychoric correlations and
thresholds for ordered categorical variables, for each group (if
applicable). If \code{"null"}, only means and variances are estimated for
numeric variables, and/or thresholds for ordered categorical variables
(i.e., covariances and/or polychoric/polyserial correlations are
constrained to zero). See \strong{Details} for more information.}
\item{group}{\code{character}. The optional name of a grouping variable, to
request FMI in each group.}
\item{ords}{Optional \code{character} vector naming ordered-categorical
variables, if they are not already stored as class \code{ordered} in \code{data}.}
\item{varnames}{Optional \code{character} vector of variable names, to calculate
FMI for a subset of variables in \code{data}. By default, all numeric and
\verb{ordered=} variables will be included, unless \verb{data=} is a single
incomplete \code{data.frame}, in which case only numeric variables can be
used with FIML estimation. Other variable types will be removed.}
\item{exclude}{Optional \code{character} vector naming variables to exclude from
the analysis.}
\item{return.fit}{logical. If \code{TRUE}, the fitted \link[lavaan:lavaan-class]{lavaan::lavaan} or
\link[lavaan.mi:lavaan.mi-class]{lavaan.mi::lavaan.mi} model is returned, so FMI can be found from
\code{summary(..., fmi=TRUE)}.}
}
\value{
\code{fmi()} returns a list with at least 2 of the following:
\item{Covariances}{A list of symmetric matrices: (1) the estimated/pooled
covariance matrix, or a list of group-specific matrices (if applicable)
and (2) a matrix of FMI, or a list of group-specific matrices (if
applicable). Only available if \code{method = "saturated"}. When
\code{method="cor"}, this element is replaced by \code{Correlations}.}
\item{Variances}{The estimated/pooled variance for each numeric variable.
Only available if \code{method = "null"} (otherwise, it is on the diagonal
of Covariances).}
\item{Means}{The estimated/pooled mean for each numeric variable.}
\item{Thresholds}{The estimated/pooled threshold(s) for each
ordered-categorical variable.}
}
\description{
This function estimates the Fraction of Missing Information (FMI) for
summary statistics of each variable, using either an incomplete data set or
a list of imputed data sets.
}
\details{
The function estimates a saturated model with \code{\link[lavaan:lavaan]{lavaan::lavaan()}} for a
single incomplete data set using FIML, or with \code{\link[lavaan.mi:lavaan.mi]{lavaan.mi::lavaan.mi()}}
for a list of imputed data sets. If method = \code{"saturated"}, FMI will be
estiamted for all summary statistics, which could take a lot of time with
big data sets. If method = \code{"null"}, FMI will only be estimated for
univariate statistics (e.g., means, variances, thresholds). The saturated
model gives more reliable estimates, so it could also help to request a
subset of variables from a large data set.
}
\examples{
HSMiss <- HolzingerSwineford1939[ , c(paste("x", 1:9, sep = ""),
"ageyr","agemo","school")]
set.seed(12345)
HSMiss$x5 <- ifelse(HSMiss$x5 <= quantile(HSMiss$x5, .3), NA, HSMiss$x5)
age <- HSMiss$ageyr + HSMiss$agemo/12
HSMiss$x9 <- ifelse(age <= quantile(age, .3), NA, HSMiss$x9)
## calculate FMI (using FIML, provide partially observed data set)
(out1 <- fmi(HSMiss, exclude = "school"))
(out2 <- fmi(HSMiss, exclude = "school", method = "null"))
(out3 <- fmi(HSMiss, varnames = c("x5","x6","x7","x8","x9")))
(out4 <- fmi(HSMiss, method = "cor", group = "school")) # correlations by group
## significance tests in lavaan(.mi) object
out5 <- fmi(HSMiss, method = "cor", return.fit = TRUE)
summary(out5) # factor loading == SD, covariance = correlation
if(requireNamespace("lavaan.mi")){
## ordered-categorical data
data(binHS5imps, package = "lavaan.mi")
## calculate FMI, using list of imputed data sets
fmi(binHS5imps, group = "school")
}
}
\references{
Rubin, D. B. (1987). \emph{Multiple imputation for nonresponse in surveys}.
New York, NY: Wiley.
Savalei, V. & Rhemtulla, M. (2012). On obtaining estimates of the fraction
of missing information from full information maximum likelihood.
\emph{Structural Equation Modeling, 19}(3), 477--494.
\doi{10.1080/10705511.2012.687669}
Wagner, J. (2010). The fraction of missing information as a tool for
monitoring the quality of survey data. \emph{Public Opinion Quarterly,
74}(2), 223--243. \doi{10.1093/poq/nfq007}
}
\author{
Mauricio Garnier Villarreal (Vrije Universiteit Amsterdam; \email{m.garniervillarreal@vu.nl})
Terrence Jorgensen (University of Amsterdam; \email{TJorgensen314@gmail.com})
}
|