1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
|
\name{prabclust}
\alias{prabclust}
\alias{print.prabclust}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Clustering for biotic elements or for species delimitation
(mixture method)}
\description{
Clusters a presence-absence matrix object (for clustering
ranges/finding biotic elements, Hennig and Hausdorf, 2004) or
an object of genetic information (for species delimitation, Hausdorf
and Hennig, 2010)
by calculating an MDS from
the distances, and applying maximum likelihood Gaussian mixtures clustering
with "noise" (package \code{mclust}) to the MDS points. The solution
is plotted. A standard execution (using the default distance of
\code{prabinit}) will be \cr
\code{prabmatrix <- prabinit(file="path/prabmatrixfile",
neighborhood="path/neighborhoodfile")}\cr
\code{clust <- prabclust(prabmatrix)}\cr
\code{print(clust)} \cr
Examples for species delimitation are given below in the examples section.
\bold{Note:} Data formats are described
on the \code{\link{prabinit}} and \code{\link{alleleinit}}
help pages. You may also consider the example datasets
\code{kykladspecreg.dat}, \code{nb.dat},
\code{Heterotrigona_indoFO.txt} or \code{MartinezOrtega04AFLP.dat}.
\cr
\bold{Note:} \code{prabclust} calls the function
\code{\link[mclust]{mclustBIC}} in package mclust. An alternative is the use of \code{\link{hprabclust}}.
}
\usage{
prabclust(prabobj, mdsmethod = "classical", mdsdim = 4, nnk =
ceiling(prabobj$n.species/40), nclus = 0:9, modelid = "all", permutations=0)
\method{print}{prabclust}(x, bic=FALSE, ...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{prabobj}{object of class \code{prab} as
generated by \code{prabinit}. Presence-absence data to be analyzed.
(This can be geographical information for range clustering
Can also be an object of class \code{alleleobject} as generated by
\code{alleleinit}.
}
\item{mdsmethod}{\code{"classical"}, \code{"kruskal"}, or
\code{"sammon"}. The MDS method
to transform the distances to data points. \code{"classical"} indicates
metric MDS by function \code{cmdscale}, \code{"kruskal"} is
non-metric MDS.}
\item{mdsdim}{integer. Dimension of the MDS points. For
\code{mdsmethod=="kruskal"}, \code{\link{stressvals}} can be used to
see how the stress depends on \code{mdsdim} in order to choose
\code{mdsdim} to get a small stress (smaller than 5\%, say).}
\item{nnk}{integer. Number of nearest neighbors to determine the
initial noise estimation by \code{NNclean}. \code{nnk=0} fits the
model without a noise component.}
\item{nclus}{vector of integers. Numbers of clusters to perform the
mixture estimation.}
\item{modelid}{string. Model name for \code{mclustBIC} (see the
corresponding help page; all models or combinations of models
mentioned there are possible). \code{modelid="all"} compares all possible
models. Additionally, \code{"noVVV"} is possible, which
fits all methods except \code{"VVV"}.}
\item{permutations}{integer. It has been found occasionally that
depending on the order of observations the algorithms \code{isoMDS}
and \code{mclustBIC} converge to different solutions. This is
because these methods require an ordering of the distances, which,
if equal distance values are involved, may depend on the order.
\code{prabclust} uses a standard ordering which should give a
reproducible solution in these cases as well. However, if
\code{permutations>0}, which gives a number of random permutations
of the observations, the algorithm is carried out for every
permutation and the best solution (in terms of the BIC, based on the
lowest stress MDS configuration) is given out (for many datasets
this won't change anything except increasing the computing time).}
\item{x}{object of class \code{prabclust}. Output of
\code{prabclust}.}
\item{bic}{logical. If \code{TRUE}, information about the BIC
criterion to choose the model is displayed.}
\item{...}{necessary for summary method.}
}
\details{
Note that if \code{mdsmethod!="classical"}, zero distances between
non-identical objects are replaced by the smallest nonzero distance
divided by 10 to prevent the MDS methods from producing an error.
}
\value{
\code{print.prabclust} does not produce output.
\code{prabclust} generates an object of class \code{prabclust}. This is a
list with components
\item{clustering}{vector of integers indicating the cluster memberships of
the species. Noise can be recognized by output component \code{symbols}.}
\item{clustsummary}{output object of \code{summary.mclustBIC}. A list
giving the optimal (according to BIC) parameters,
conditional probabilities `z', and loglikelihood, together with
the associated classification and its uncertainty. Note that the
numbering of clusters may differ from \code{clustering}, see
\code{csreorder}.}
\item{bicsummary}{output object of \code{mclustBIC}. Bayesian Information
Criterion for the specified mixture models and numbers of clusters. }
\item{points}{numerical matrix. MDS configuration.}
\item{nnk}{see above.}
\item{mdsdim}{see above.}
\item{mdsmethod}{see above.}
\item{symbols}{vector of characters, similar to \code{clustering}, but
indicating estimated noise and points belonging to
one-point-components (which should be interpreted as some kind of
noise as well) by \code{"N"}. }
\item{permchange}{logical. If \code{TRUE}, \code{permutations>0} has
been used and the best solution is different from the one obtained
by the standard ordering. (This is just for information and has no
further operational consequences.)}
}
\references{
Fraley, C. and Raftery, A. E. (1998) How many clusters? Which
clusterin method? - Answers via Model-Based Cluster Analysis.
\emph{Computer Journal} 41, 578-588.
Hausdorf, B. and Hennig, C. (2010) Species Delimitation Using Dominant
and Codominant Multilocus Markers. \emph{Systematic Biology}, 59, 491-503.
Hennig, C. and Hausdorf, B. (2004) Distance-based parametric bootstrap
tests for clustering of species ranges. \emph{Computational Statistics
and
Data Analysis} 45, 875-896.
\url{http://stat.ethz.ch/Research-Reports/110.html}.
}
\note{
Note that we used \code{mdsmethod="kruskal"} in our publications, but
\code{mdsmethod="classical"} is now the default, because of
occasional numerical instabilities of the \code{isoMDS}-implementation
for Jaccard, Kulczynski or geco distance matrices.
Sometimes, \code{prabclust} produces an error because \code{mclustBIC}
cannot handle all models properly. In this case we recommend to change
the \code{modelid} parameter. \code{"noVVV"} and \code{"VVV"} are
reasonable alternative choices (one of these is expected to reproduce
the error, but the other one might work).
}
\author{Christian Hennig
\email{christian.hennig@unibo.it}
\url{https://www.unibo.it/sitoweb/christian.hennig/en}}
\seealso{
\code{\link[mclust]{mclustBIC}}, \code{\link[mclust]{summary.mclustBIC}},
\code{\link{NNclean}}, \code{\link{cmdscale}},
\code{\link{isoMDS}}, \code{\link{sammon}},
\code{\link{prabinit}}, \code{\link{hprabclust}},
\code{\link{alleleinit}}, \code{\link{stressvals}}.
}
\examples{
\donttest{
# Biotic element/range clustering:
data(kykladspecreg)
data(nb)
set.seed(1234)
x <- prabinit(prabmatrix=kykladspecreg, neighborhood=nb)
# If you want to use your own ASCII data files, use
# x <- prabinit(file="path/prabmatrixfile",
# neighborhood="path/neighborhoodfile")
print(prabclust(x))
# Here is an example for species delimitation with codominant markers;
# only 50 individuals were used in order to have a fast example.
data(tetragonula)
ta <- alleleconvert(strmatrix=tetragonula[1:50,])
tai <- alleleinit(allelematrix=ta)
print(prabclust(tai))
# Here is an example for species delimitation with dominant markers;
# only 50 individuals were used in order to have a fast example.
# You may want to use stressvals to choose mdsdim.
data(veronica)
vei <- prabinit(prabmatrix=veronica[1:50,],distance="jaccard")
print(prabclust(vei,mdsmethod="kruskal",mdsdim=3))
}
}
\keyword{cluster}% at least one, from doc/KEYWORDS
\keyword{spatial}% __ONLY ONE__ keyword per line
|