File: fastcluster.R

package info (click to toggle)
r-cran-fastcluster 1.3.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 616 kB
  • sloc: cpp: 2,705; python: 854; makefile: 35; sh: 11
file content (68 lines) | stat: -rw-r--r-- 2,404 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#  fastcluster: Fast hierarchical clustering routines for R and Python
#
#  Copyright:
#    * Until package version 1.1.23: © 2011 Daniel Müllner <https://danifold.net>
#    * All changes from version 1.1.24 on: © Google Inc. <https://www.google.com>

hclust <- function(d, method="complete", members=NULL)
{
  # Hierarchical clustering, on raw input data.
  if(method == "ward") {
    message("The \"ward\" method has been renamed to \"ward.D\"; note new \"ward.D2\"")
    method <- "ward.D"
  }
  # This array must agree with the enum method_codes in fastcluster.cpp.
  METHODS <- c("single", "complete", "average", "mcquitty", "ward.D", "centroid", "median", "ward.D2")
  method <- pmatch(method, METHODS)
  if (is.na(method))
    stop("Invalid clustering method.")
  if (method == -1)
    stop("Ambiguous clustering method.")
  dendrogram <- c( .Call(fastcluster, attr(d, "Size"), method, d, members),
    list(
      labels = attr(d, "Labels")
      ,method = METHODS[method]
      ,call = match.call()
      ,dist.method = attr(d, "method")
    )
  )
  class(dendrogram) <- "hclust"
  return (dendrogram)
}

hclust.vector <- function(X, method='single', members=NULL, metric='euclidean', p=NULL)
{
  # Hierarchical clustering, on vector data.
  METHODS <- c("single", "ward", "centroid", "median")
  methodidx <- pmatch(method, METHODS)
  if (is.na(methodidx))
    stop(paste("Invalid clustering method '", method, "' for vector data.", sep=''))
  if (methodidx == -1)
    stop("Ambiguous clustering method.")

  METRICS <- c("euclidean", "maximum", "manhattan", "canberra", "binary",
               "minkowski")
  metric = pmatch(metric, METRICS)
  if (is.na(metric) || metric > 6)
    stop("Invalid metric.")
  if (metric == -1)
    stop("Ambiguous metric.")
  if (metric == 4 && getRversion() < "3.5.0")
    metric <- as.integer(7) # special metric code for backwards compatibility

  if (methodidx!=1 && metric!=1)
    stop("The Euclidean methods 'ward', 'centroid' and 'median' require the 'euclidean' metric.")

  X <- as.matrix(X)

  dendrogram <- c( .Call(fastcluster_vector, methodidx, metric, X, members, p),
    list(
      labels = dimnames(X)[[1L]]
      ,method = METHODS[methodidx]
      ,call = match.call()
      ,dist.method = METRICS[metric]
    )
  )
  class(dendrogram) <- "hclust"
  return (dendrogram)
}