File: MclustDA.Rd

package info (click to toggle)
r-cran-mclust 6.1.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,540 kB
  • sloc: fortran: 13,298; ansic: 201; sh: 4; makefile: 2
file content (232 lines) | stat: -rw-r--r-- 8,629 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
\name{MclustDA}
\alias{MclustDA}
\alias{print.MclustDA}

\title{MclustDA discriminant analysis}

\description{
Discriminant analysis based on Gaussian finite mixture modeling.
}

\usage{
MclustDA(data, class, G = NULL, modelNames = NULL, 
         modelType = c("MclustDA", "EDDA"), 
         prior = NULL, 
         control = emControl(), 
         initialization = NULL, 
         warn = mclust.options("warn"), 
         verbose = interactive(),
         \dots)
}

\arguments{
  \item{data}{
    A data frame or matrix giving the training data.
  }
	
  \item{class}{
		A vector giving the known class labels (either a numerical value or 
		a character string) for the observations in the training data.}
    
 \item{G}{
    An integer vector specifying the numbers of mixture components
    (clusters) for which the BIC is to be calculated within each class. 
    The default is \code{G = 1:5}.\cr
    A different set of mixture components for each class can be specified
    by providing this argument with a list of integers for each class. 
    See the examples below.
  }
  
  \item{modelNames}{
    A vector of character strings indicating the models to be fitted 
    by EM within each class (see the description in 
    \code{\link{mclustModelNames}}).
    A different set of mixture models for each class can be specified
    by providing this argument with a list of character strings.
    See the examples below.
  }
   
  \item{modelType}{
    A character string specifying whether the models given in
    \code{modelNames} should fit a different number of mixture 
    components and covariance structures for each class 
    (\code{"MclustDA"}, the default) or should be constrained 
    to have a single component for each class with the same covariance 
    structure among classes (\code{"EDDA"}).
    See Details section and the examples below.
  }

  \item{prior}{
    The default assumes no prior, but this argument allows specification of a 
    conjugate prior on the means and variances through the function 
    \code{\link{priorControl}}.
  }
  
  \item{control}{
    A list of control parameters for EM. The defaults are set by the call
    \code{emControl()}. 
  }
  
  \item{initialization}{
    A list containing zero or more of the following components:
  \describe{
  \item{\code{hcPairs}}{
    A matrix of merge pairs for hierarchical clustering such as produced
    by function \code{hc}. The default is to compute a hierarchical
    clustering tree by applying function \code{hc} with
    \code{modelName = "E"} to univariate data and
    \code{modelName = "VVV"} to multivariate data or a
    subset as indicated by the \code{subset} argument. 
    The hierarchical clustering results are used as starting values 
    for EM.}
  \item{\code{subset}}{
    A logical or numeric vector specifying a subset of the data
    to be used in the initial hierarchical clustering phase.}
  }
  }
  
  \item{warn}{
    A logical value indicating whether or not certain warnings
    (usually related to singularity) should be issued when
    estimation fails. 
    The default is controlled by \code{\link{mclust.options}}.
  }
  \item{verbose}{
    A logical controlling if a text progress bar is displayed during the
    fitting procedure. By default is \code{TRUE} if the session is 
    interactive, and \code{FALSE} otherwise.
  }  
  \item{\dots }{Further arguments passed to or from other methods.}
}

\value{
 An object of class \code{'MclustDA'} providing the optimal (according 
 to BIC) mixture model.
 
 The details of the output components are as follows:

  \item{call}{The matched call.} 

  \item{data}{The input data matrix.} 
  
  \item{class}{The input class labels.} 
    
  \item{type}{A character string specifying the \code{modelType} estimated.}
    
  \item{models}{A list of \code{\link{Mclust}} objects containing information
  on fitted model for each class.} 
    
  \item{n}{The total number of observations in the data.}

  \item{d}{The dimension of the data.}

  % \item{BIC}{All BIC values.}
  
  \item{bic}{Optimal BIC value.}
    
  \item{loglik}{Log-likelihood for the selected model.}
  
  \item{df}{Number of estimated parameters.}
}

\details{
The \code{"EDDA"} method for discriminant analysis is described in Bensmail and Celeux (1996), while \code{"MclustDA"} in Fraley and Raftery (2002).
}

\references{
Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) \emph{Model-Based Clustering, Classification, and Density Estimation Using mclust in R}. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/

Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. 

Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631.

Bensmail, H., and Celeux, G. (1996) Regularized Gaussian Discriminant Analysis Through Eigenvalue Decomposition.\emph{Journal of the American Statistical Association}, 91, 1743-1748.
}

\author{Luca Scrucca}

\seealso{
  \code{\link{summary.MclustDA}}, 
  \code{\link{plot.MclustDA}}, 
  \code{\link{predict.MclustDA}}, 
  \code{\link{classError}}
}
\examples{
odd <- seq(from = 1, to = nrow(iris), by = 2)
even <- odd + 1
X.train <- iris[odd,-5]
Class.train <- iris[odd,5]
X.test <- iris[even,-5]
Class.test <- iris[even,5]

# common EEE covariance structure (which is essentially equivalent to linear discriminant analysis)
irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA", modelNames = "EEE")
summary(irisMclustDA, parameters = TRUE)
summary(irisMclustDA, newdata = X.test, newclass = Class.test)

# common covariance structure selected by BIC
irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA")
summary(irisMclustDA, parameters = TRUE)
summary(irisMclustDA, newdata = X.test, newclass = Class.test)

# general covariance structure selected by BIC
irisMclustDA <- MclustDA(X.train, Class.train)
summary(irisMclustDA, parameters = TRUE)
summary(irisMclustDA, newdata = X.test, newclass = Class.test)

plot(irisMclustDA)
plot(irisMclustDA, dimens = 3:4)
plot(irisMclustDA, dimens = 4)

plot(irisMclustDA, what = "classification")
plot(irisMclustDA, what = "classification", newdata = X.test)
plot(irisMclustDA, what = "classification", dimens = 3:4)
plot(irisMclustDA, what = "classification", newdata = X.test, dimens = 3:4)
plot(irisMclustDA, what = "classification", dimens = 4)
plot(irisMclustDA, what = "classification", dimens = 4, newdata = X.test)

plot(irisMclustDA, what = "train&test", newdata = X.test)
plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 3:4)
plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 4)

plot(irisMclustDA, what = "error")
plot(irisMclustDA, what = "error", dimens = 3:4)
plot(irisMclustDA, what = "error", dimens = 4)
plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test)
plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 3:4)
plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 4)

\donttest{
# simulated 1D data
n <- 250 
set.seed(1)
triModal <- c(rnorm(n,-5), rnorm(n,0), rnorm(n,5))
triClass <- c(rep(1,n), rep(2,n), rep(3,n))
odd <- seq(from = 1, to = length(triModal), by = 2)
even <- odd + 1
triMclustDA <- MclustDA(triModal[odd], triClass[odd])
summary(triMclustDA, parameters = TRUE)
summary(triMclustDA, newdata = triModal[even], newclass = triClass[even])
plot(triMclustDA, what = "scatterplot")
plot(triMclustDA, what = "classification")
plot(triMclustDA, what = "classification", newdata = triModal[even])
plot(triMclustDA, what = "train&test", newdata = triModal[even])
plot(triMclustDA, what = "error")
plot(triMclustDA, what = "error", newdata = triModal[even], newclass = triClass[even])

# simulated 2D cross data
data(cross)
odd <- seq(from = 1, to = nrow(cross), by = 2)
even <- odd + 1
crossMclustDA <- MclustDA(cross[odd,-1], cross[odd,1])
summary(crossMclustDA, parameters = TRUE)
summary(crossMclustDA, newdata = cross[even,-1], newclass = cross[even,1])
plot(crossMclustDA, what = "scatterplot")
plot(crossMclustDA, what = "classification")
plot(crossMclustDA, what = "classification", newdata = cross[even,-1])
plot(crossMclustDA, what = "train&test", newdata = cross[even,-1])
plot(crossMclustDA, what = "error")
plot(crossMclustDA, what = "error", newdata =cross[even,-1], newclass = cross[even,1])
}
}
\keyword{multivariate}