1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
|
\documentclass[12pt]{article}
\title{Combining multiple imputations}
\author{Thomas Lumley}
\SweaveOpts{echo=TRUE}
%\VignetteIndexEntry{smi}{Combining multiple imputations}
\begin{document}
\maketitle
<<echo=FALSE>>=
options(width=70)
@
Carlin \emph{et al.} (2003) illustrate the use of their Stata texttt for
multiple imputations with data from a cohort study of adolescent
health. Five sets of imputations were done, separately for male and
female participants. The resulting datasets are in \texttt{mitools/dta}.
First we read all the datasets into R, using \texttt{read.dta} from the \texttt{foreign} package.
<<>>=
library(mitools)
data.dir<-system.file("dta",package="mitools")
## read in data
library(foreign)
women<-imputationList(lapply(list.files(data.dir,
pattern="f.\\.dta",full=TRUE),
read.dta, warn.missing.labels=FALSE))
men<-imputationList(lapply(list.files(data.dir,
pattern="m.\\.dta",full=TRUE),
read.dta, warn.missing.labels=FALSE))
@
We now combine the imputations for men and women, first defining a \texttt{sex} variable
<<>>=
## add sex variable
women<-update(women,sex=0)
men<-update(men, sex=1)
## combine two sets of imputations
all<-rbind(women,men)
all
colnames(all)
@
Now tabulate drinking frequency by sex
<<>>=
with(all, table(sex, drkfre))
@
and define a new `regular drinking' variables.
<<>>=
all<-update(all, drkreg=as.numeric(drkfre)>2)
## tables
with(all, table(sex, drkreg))
@
We can now fit a logistic regression model for trends over time in drinking:
<<>>=
## logistic regression model
model1<-with(all, glm(drkreg~wave*sex, family=binomial()))
MIcombine(model1)
summary(MIcombine(model1))
@
For model objects with \texttt{coef} and \texttt{vcov} methods the
extraction of coefficients and variances is automatic, but \texttt{MIextract} can still be used:
<<>>=
beta<-MIextract(model1, fun=coef)
vars<-MIextract(model1, fun=vcov)
summary(MIcombine(beta,vars))
@
\subsection*{References}
Carlin JB, Li N, Greenwood P, Coffey C. (2003) Tools for analyzing multiply imputed datasets. \emph{Stata Journal} 3:1--20.
\end{document}
|