1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
|
\name{MclustDA}
\alias{MclustDA}
\alias{print.MclustDA}
\title{MclustDA discriminant analysis}
\description{
Discriminant analysis based on Gaussian finite mixture modeling.
}
\usage{
MclustDA(data, class, G = NULL, modelNames = NULL,
modelType = c("MclustDA", "EDDA"),
prior = NULL,
control = emControl(),
initialization = NULL,
warn = mclust.options("warn"),
verbose = interactive(),
\dots)
}
\arguments{
\item{data}{
A data frame or matrix giving the training data.
}
\item{class}{
A vector giving the known class labels (either a numerical value or
a character string) for the observations in the training data.}
\item{G}{
An integer vector specifying the numbers of mixture components
(clusters) for which the BIC is to be calculated within each class.
The default is \code{G = 1:5}.\cr
A different set of mixture components for each class can be specified
by providing this argument with a list of integers for each class.
See the examples below.
}
\item{modelNames}{
A vector of character strings indicating the models to be fitted
by EM within each class (see the description in
\code{\link{mclustModelNames}}).
A different set of mixture models for each class can be specified
by providing this argument with a list of character strings.
See the examples below.
}
\item{modelType}{
A character string specifying whether the models given in
\code{modelNames} should fit a different number of mixture
components and covariance structures for each class
(\code{"MclustDA"}, the default) or should be constrained
to have a single component for each class with the same covariance
structure among classes (\code{"EDDA"}).
See Details section and the examples below.
}
\item{prior}{
The default assumes no prior, but this argument allows specification of a
conjugate prior on the means and variances through the function
\code{\link{priorControl}}.
}
\item{control}{
A list of control parameters for EM. The defaults are set by the call
\code{emControl()}.
}
\item{initialization}{
A list containing zero or more of the following components:
\describe{
\item{\code{hcPairs}}{
A matrix of merge pairs for hierarchical clustering such as produced
by function \code{hc}. The default is to compute a hierarchical
clustering tree by applying function \code{hc} with
\code{modelName = "E"} to univariate data and
\code{modelName = "VVV"} to multivariate data or a
subset as indicated by the \code{subset} argument.
The hierarchical clustering results are used as starting values
for EM.}
\item{\code{subset}}{
A logical or numeric vector specifying a subset of the data
to be used in the initial hierarchical clustering phase.}
}
}
\item{warn}{
A logical value indicating whether or not certain warnings
(usually related to singularity) should be issued when
estimation fails.
The default is controlled by \code{\link{mclust.options}}.
}
\item{verbose}{
A logical controlling if a text progress bar is displayed during the
fitting procedure. By default is \code{TRUE} if the session is
interactive, and \code{FALSE} otherwise.
}
\item{\dots }{Further arguments passed to or from other methods.}
}
\value{
An object of class \code{'MclustDA'} providing the optimal (according
to BIC) mixture model.
The details of the output components are as follows:
\item{call}{The matched call.}
\item{data}{The input data matrix.}
\item{class}{The input class labels.}
\item{type}{A character string specifying the \code{modelType} estimated.}
\item{models}{A list of \code{\link{Mclust}} objects containing information
on fitted model for each class.}
\item{n}{The total number of observations in the data.}
\item{d}{The dimension of the data.}
% \item{BIC}{All BIC values.}
\item{bic}{Optimal BIC value.}
\item{loglik}{Log-likelihood for the selected model.}
\item{df}{Number of estimated parameters.}
}
\details{
The \code{"EDDA"} method for discriminant analysis is described in Bensmail and Celeux (1996), while \code{"MclustDA"} in Fraley and Raftery (2002).
}
\references{
Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) \emph{Model-Based Clustering, Classification, and Density Estimation Using mclust in R}. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/
Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317.
Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631.
Bensmail, H., and Celeux, G. (1996) Regularized Gaussian Discriminant Analysis Through Eigenvalue Decomposition.\emph{Journal of the American Statistical Association}, 91, 1743-1748.
}
\author{Luca Scrucca}
\seealso{
\code{\link{summary.MclustDA}},
\code{\link{plot.MclustDA}},
\code{\link{predict.MclustDA}},
\code{\link{classError}}
}
\examples{
odd <- seq(from = 1, to = nrow(iris), by = 2)
even <- odd + 1
X.train <- iris[odd,-5]
Class.train <- iris[odd,5]
X.test <- iris[even,-5]
Class.test <- iris[even,5]
# common EEE covariance structure (which is essentially equivalent to linear discriminant analysis)
irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA", modelNames = "EEE")
summary(irisMclustDA, parameters = TRUE)
summary(irisMclustDA, newdata = X.test, newclass = Class.test)
# common covariance structure selected by BIC
irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA")
summary(irisMclustDA, parameters = TRUE)
summary(irisMclustDA, newdata = X.test, newclass = Class.test)
# general covariance structure selected by BIC
irisMclustDA <- MclustDA(X.train, Class.train)
summary(irisMclustDA, parameters = TRUE)
summary(irisMclustDA, newdata = X.test, newclass = Class.test)
plot(irisMclustDA)
plot(irisMclustDA, dimens = 3:4)
plot(irisMclustDA, dimens = 4)
plot(irisMclustDA, what = "classification")
plot(irisMclustDA, what = "classification", newdata = X.test)
plot(irisMclustDA, what = "classification", dimens = 3:4)
plot(irisMclustDA, what = "classification", newdata = X.test, dimens = 3:4)
plot(irisMclustDA, what = "classification", dimens = 4)
plot(irisMclustDA, what = "classification", dimens = 4, newdata = X.test)
plot(irisMclustDA, what = "train&test", newdata = X.test)
plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 3:4)
plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 4)
plot(irisMclustDA, what = "error")
plot(irisMclustDA, what = "error", dimens = 3:4)
plot(irisMclustDA, what = "error", dimens = 4)
plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test)
plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 3:4)
plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 4)
\donttest{
# simulated 1D data
n <- 250
set.seed(1)
triModal <- c(rnorm(n,-5), rnorm(n,0), rnorm(n,5))
triClass <- c(rep(1,n), rep(2,n), rep(3,n))
odd <- seq(from = 1, to = length(triModal), by = 2)
even <- odd + 1
triMclustDA <- MclustDA(triModal[odd], triClass[odd])
summary(triMclustDA, parameters = TRUE)
summary(triMclustDA, newdata = triModal[even], newclass = triClass[even])
plot(triMclustDA, what = "scatterplot")
plot(triMclustDA, what = "classification")
plot(triMclustDA, what = "classification", newdata = triModal[even])
plot(triMclustDA, what = "train&test", newdata = triModal[even])
plot(triMclustDA, what = "error")
plot(triMclustDA, what = "error", newdata = triModal[even], newclass = triClass[even])
# simulated 2D cross data
data(cross)
odd <- seq(from = 1, to = nrow(cross), by = 2)
even <- odd + 1
crossMclustDA <- MclustDA(cross[odd,-1], cross[odd,1])
summary(crossMclustDA, parameters = TRUE)
summary(crossMclustDA, newdata = cross[even,-1], newclass = cross[even,1])
plot(crossMclustDA, what = "scatterplot")
plot(crossMclustDA, what = "classification")
plot(crossMclustDA, what = "classification", newdata = cross[even,-1])
plot(crossMclustDA, what = "train&test", newdata = cross[even,-1])
plot(crossMclustDA, what = "error")
plot(crossMclustDA, what = "error", newdata =cross[even,-1], newclass = cross[even,1])
}
}
\keyword{multivariate}
|