File: mclustICL.Rd

package info (click to toggle)
r-cran-mclust 6.1.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,540 kB
  • sloc: fortran: 13,298; ansic: 201; sh: 4; makefile: 2
file content (128 lines) | stat: -rw-r--r-- 4,386 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
\name{mclustICL}
\alias{mclustICL}
\alias{print.mclustICL}
\alias{summary.mclustICL}
\alias{print.summary.mclustICL}

\title{ICL Criterion for Model-Based Clustering}

\description{
ICL (Integrated Complete-data Likelihood) for parameterized Gaussian mixture models fitted by EM algorithm initialized by model-based hierarchical clustering.
}

\usage{
mclustICL(data, G = NULL, modelNames = NULL, 
          initialization = list(hcPairs = NULL, 
                                subset = NULL, 
                                noise = NULL), 
          x = NULL, \dots)

\method{summary}{mclustICL}(object, G, modelNames, \dots)
}

\arguments{
  \item{data}{
    A numeric vector, matrix, or data frame of observations. Categorical
    variables are not allowed. If a matrix or data frame, rows
    correspond to observations and columns correspond to variables. 
  }
  
  \item{G}{
    An integer vector specifying the numbers of mixture components
    (clusters) for which the criteria should be calculated. 
    The default is \code{G = 1:9}. 
  }
  
  \item{modelNames}{
    A vector of character strings indicating the models to be fitted 
    in the EM phase of clustering. The help file for 
    \code{\link{mclustModelNames}} describes the available models.
    The default is:
    \describe{
        \item{\code{c("E", "V")}}{for univariate data}
        \item{\code{mclust.options("emModelNames")}}{for multivariate data (n > d)}
        \item{\code{c("EII", "VII", "EEI", "EVI", "VEI", "VVI")}}{the spherical and diagonal models for multivariate data (n <= d)}
     }
   }

 \item{initialization}{
    A list containing zero or more of the following components:
  \describe{
  \item{\code{hcPairs}}{
    A matrix of merge pairs for hierarchical clustering such as produced
    by function \code{hc}. For multivariate data, the default is to compute
    a hierarchical clustering tree by applying function \code{hc} with
    \code{modelName = "VVV"} to the data or a subset as indicated by the
    \code{subset} argument.
    The hierarchical clustering results are to start EM.
    For univariate data, the default is to use quantiles to start EM.
  }
  \item{\code{subset}}{
    A logical or numeric vector specifying a subset of the data
    to be used in the initial hierarchical clustering phase.
  }
  }
  }
  \item{x}{
     An object of class \code{'mclustICL'}. If supplied, \code{mclustICL}
     will use the settings in \code{x} to produce another object of
     class \code{'mclustICL'}, but with \code{G} and \code{modelNames}
     as specified in the arguments. Models that have already been computed
     in \code{x} are not recomputed. All arguments to \code{mclustICL} 
     except \code{data}, \code{G} and \code{modelName} are
     ignored and their values are set as specified in the attributes of
     \code{x}. 
     Defaults for \code{G} and \code{modelNames} are taken from \code{x}.
  }
  \item{\dots}{
    Futher arguments used in the call to \code{\link{Mclust}}. 
    See also \code{\link{mclustBIC}}.
  }

  \item{object}{
    An integer vector specifying the numbers of mixture components
    (clusters) for which the criteria should be calculated. 
    The default is \code{G = 1:9}. 
  }

}

\value{
Returns an object of class \code{'mclustICL'} containing the the ICL criterion 
for the specified mixture models and numbers of clusters.

The corresponding \code{print} method shows the matrix of values and the top models according to the ICL criterion. The \code{summary} method shows only the top models.
}

\references{
Biernacki, C., Celeux, G., Govaert, G. (2000). 
Assessing a mixture model for clustering with the integrated completed likelihood.
\emph{IEEE Trans. Pattern Analysis and Machine Intelligence}, 22 (7), 719-725.

Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. 
}

\seealso{
  \code{\link{plot.mclustICL}}, 
  \code{\link{Mclust}}, 
  \code{\link{mclustBIC}}, 
  \code{\link{mclustBootstrapLRT}}, 
  \code{\link{bic}},
  \code{\link{icl}}
}

\examples{
data(faithful)
faithful.ICL <- mclustICL(faithful)
faithful.ICL
summary(faithful.ICL)
plot(faithful.ICL)
\donttest{
# compare with
faithful.BIC <- mclustBIC(faithful)
faithful.BIC
plot(faithful.BIC)
}
}

\keyword{cluster}