1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
\name{apclusterK}
\docType{methods}
\alias{apclusterK}
\alias{apclusterK-methods}
\alias{apclusterK,matrix,missing-method}
\alias{apclusterK,Matrix,missing-method}
\alias{apclusterK,dgTMatrix,missing-method}
\alias{apclusterK,sparseMatrix,missing-method}
\alias{apclusterK,character,ANY-method}
\alias{apclusterK,function,ANY-method}
\title{Affinity Propagation for Pre-defined Number of Clusters}
\description{
Runs affinity propagation clustering for a given similarity matrix
adjusting input preferences iteratively in order to achieve a desired
number of clusters
}
\usage{
\S4method{apclusterK}{matrix,missing}(s, x, K, prc=10, bimaxit=20, exact=FALSE,
maxits=1000, convits=100, lam=0.9, includeSim=FALSE, details=FALSE,
nonoise=FALSE, seed=NA, verbose=TRUE)
\S4method{apclusterK}{Matrix,missing}(s, x, K, ...)
\S4method{apclusterK}{dgTMatrix,missing}(s, x, K, prc=10, bimaxit=20,
exact=FALSE, maxits=1000, convits=100, lam=0.9, includeSim=FALSE,
details=FALSE, nonoise=FALSE, seed=NA, verbose=TRUE)
\S4method{apclusterK}{sparseMatrix,missing}(s, x, K, ...)
\S4method{apclusterK}{function,ANY}(s, x, K, prc=10, bimaxit=20, exact=FALSE,
maxits=1000, convits=100, lam=0.9, includeSim=TRUE, details=FALSE,
nonoise=FALSE, seed=NA, verbose=TRUE, ...)
\S4method{apclusterK}{character,ANY}(s, x, K, prc=10, bimaxit=20, exact=FALSE,
maxits=1000, convits=100, lam=0.9, includeSim=TRUE, details=FALSE,
nonoise=FALSE, seed=NA, verbose=TRUE, ...)
}
\arguments{
\item{s}{an \eqn{l\times l}{lxl} similarity matrix in sparse or dense
format or a similarity
function either specified as the name of a package-provided
similarity function as character string or a user provided
function object.}
\item{x}{input data to be clustered; if \code{x} is a matrix or data
frame, rows are interpreted as samples and columns are
interpreted as features; apart from matrices or data frames,
\code{x} may be any other structured data type that
contains multiple data items - provided that an appropriate
\code{\link[base:length]{length}} function is available that
returns the number of items}
\item{K}{desired number of clusters}
\item{prc}{the algorithm stops if the number of clusters does not
deviate more than prc percent from desired value K; set to 0 if you
want to have exactly K clusters}
\item{bimaxit}{maximum number of bisection steps to perform; note that
no warning is issued if the number of clusters is still not in
the desired range}
\item{exact}{flag indicating whether or not to compute the initial
preference range exactly (see \code{\link{preferenceRange}})}
\item{maxits}{maximal number of iterations that \code{\link{apcluster}}
should execute}
\item{convits}{\code{\link{apcluster}} terminates if the examplars have not
changed for \code{convits} iterations}
\item{lam}{damping factor for \code{\link{apcluster}}; should be a value in
the range [0.5, 1);
higher values correspond to heavy damping which may be
needed if oscillations occur}
\item{includeSim}{if \code{TRUE}, the similarity matrix (either computed
internally or passed via the \code{s} argument) is stored to the
slot \code{sim} of the returned
\code{\linkS4class{APResult}} object. The default is \code{FALSE}
if \code{apclusterK} has been called for a similarity matrix,
otherwise the default is \code{TRUE}.}
\item{details}{if \code{TRUE}, more detailed information about the
algorithm's progress is stored in the output object
(see \code{\linkS4class{APResult}})}
\item{nonoise}{\code{\link{apcluster}} adds a small amount of noise to
\code{s} to prevent degenerate cases; if \code{TRUE},
this is disabled}
\item{seed}{for reproducibility, the seed of the random number
generator can be set to a fixed value, if \code{NA},
the seed remains unchanged}
\item{verbose}{flag indicating whether status information should be
displayed during bisection}
\item{...}{for the methods with signatures \code{character,ANY} and
\code{function,ANY}, all other arguments are passed to the selected
similarity function as they are; for the methods with signatures
\code{Matrix,missing} and \code{sparseMatrix,missing}, further
arguments are passed on to the \code{apclusterK} methods with
signatures \code{Matrix,missing} and \code{dgTMatrix,missing},
respectively.}
}
\details{
\code{apclusterK} first runs \code{\link{preferenceRange}} to determine
the range of meaningful choices of the input preference \code{p}. Then
it decreases \code{p} exponentially for a few iterations to obtain a
good initial guess for \code{p}. If the number of clusters is still
too far from the desired goal, bisection is applied.
When called with a similarity matrix as input, the function performs
the procedure described above. When called with the name of a package-provided
similarity function or a user-provided similarity function object and
input data, the function first computes the similarity matrix before
running \code{apclusterK} on this similarity matrix. The similarity
matrix is returned for later use as part of the APResult object
depending on whether \code{includeSim} was set to \code{TRUE} (see
argument description above).
Apart from minor adaptations and optimizations, the implementation is
largely analogous to Frey's and Dueck's Matlab code
(see \url{https://psi.toronto.edu/research/affinity-propagation-clustering-by-message-passing/}).
}
\value{
Upon successful completion, the function returns a
\code{\linkS4class{APResult}} object.
}
\author{Ulrich Bodenhofer and Andreas Kothmeier}
\references{\url{https://github.com/UBod/apcluster}
Frey, B. J. and Dueck, D. (2007) Clustering by passing messages
between data points. \emph{Science} \bold{315}, 972-976.
DOI: \doi{10.1126/science.1136800}.
Bodenhofer, U., Kothmeier, A., and Hochreiter, S. (2011)
APCluster: an R package for affinity propagation clustering.
\emph{Bioinformatics} \bold{27}, 2463-2464.
DOI: \doi{10.1093/bioinformatics/btr406}.
}
\seealso{\code{\link{apcluster}}, \code{\link{preferenceRange}},
\code{\linkS4class{APResult}}}
\examples{
## create three Gaussian clouds
cl1 <- cbind(rnorm(70, 0.2, 0.05), rnorm(70, 0.8, 0.06))
cl2 <- cbind(rnorm(50, 0.7, 0.08), rnorm(50, 0.3, 0.05))
cl3 <- cbind(rnorm(60, 0.8, 0.04), rnorm(60, 0.8, 0.05))
x <- rbind(cl1, cl2, cl3)
## run affinity propagation such that 3 clusters are obtained
apres <- apclusterK(negDistMat(r=2), x, K=3)
## show details of clustering results
show(apres)
## plot clustering result
plot(apres, x)
## create sparse similarity matrix
cl1 <- cbind(rnorm(20, 0.2, 0.05), rnorm(20, 0.8, 0.06))
cl2 <- cbind(rnorm(20, 0.7, 0.08), rnorm(20, 0.3, 0.05))
x <- rbind(cl1, cl2)
sim <- negDistMat(x, r=2)
ssim <- as.SparseSimilarityMatrix(sim, lower=-0.2)
## run apcluster() on the sparse similarity matrix
apres <- apclusterK(ssim, K=2)
apres
}
\keyword{cluster}
|