1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/find_k.R
\name{find_k}
\alias{find_k}
\alias{plot.find_k}
\title{Find the (estimated) number of clusters for a dendrogram using average silhouette width}
\usage{
find_k(dend, krange = 2:min(10, (nleaves(dend) - 1)), ...)
\method{plot}{find_k}(
x,
xlab = "Number of clusters (k)",
ylab = "Average silhouette width",
main = "Estimating the number of clusters using\\n average silhouette width",
...
)
}
\arguments{
\item{dend}{A dendrogram (or hclust) tree object}
\item{krange}{integer vector. Numbers of clusters which are to be compared
by the average silhouette width criterion.
Note: average silhouette width and Calinski-Harabasz can't estimate number
of clusters nc=1. If 1 is included, a Duda-Hart test is applied and 1 is
estimated if this is not significant.}
\item{...}{passed to \link[fpc]{pamk} (the current defaults criterion="asw" and usepam=TRUE can not be changes).}
\item{x}{An object of class "find_k" (has its own S3 plot method).}
\item{xlab, ylab, main}{parameters passed to plot.}
}
\value{
A \link[fpc]{pamk} output. This is a list with the following components:
1) pamobject - The output of the optimal run of the pam-function.
2) nc - the optimal number of clusters.
3) crit - vector of criterion values for numbers of clusters. crit[1] is the p-value of the Duda-Hart test if 1 is in krange and diss=FALSE.
4) k - a copy of nc (just to make it easier to extract - since k is often used in other functions)
}
\description{
This function estimates the number of clusters based on the maximal average \link[cluster]{silhouette} width
derived from running \link[cluster]{pam} on the \link[stats]{cophenetic} distance matrix of
the \link[stats]{dendrogram}. The output is based on the \link[fpc]{pamk} output.
}
\examples{
dend <- iris[, -5] \%>\%
dist() \%>\%
hclust() \%>\%
as.dendrogram()
dend_k <- find_k(dend)
plot(dend_k)
plot(color_branches(dend, k = dend_k$nc))
library(cluster)
sil <- silhouette(dend_k$pamobject)
plot(sil)
dend <- USArrests \%>\%
dist() \%>\%
hclust(method = "ave") \%>\%
as.dendrogram()
dend_k <- find_k(dend)
plot(dend_k)
plot(color_branches(dend, k = dend_k$nc))
}
\seealso{
\link[fpc]{pamk}, \link[cluster]{pam}, \link[cluster]{silhouette}.
}
|