File: cluster_performance.R

package info (click to toggle)
r-cran-parameters 0.24.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,852 kB
  • sloc: sh: 16; makefile: 2
file content (92 lines) | stat: -rw-r--r-- 2,666 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#' Performance of clustering models
#'
#' Compute performance indices for clustering solutions.
#'
#' @inheritParams model_parameters.hclust
#'
#' @examples
#' # kmeans
#' model <- kmeans(iris[1:4], 3)
#' cluster_performance(model)
#'
#' # hclust
#' data <- iris[1:4]
#' model <- hclust(dist(data))
#' clusters <- cutree(model, 3)
#' cluster_performance(model, data, clusters)
#'
#' # Retrieve performance from parameters
#' params <- model_parameters(kmeans(iris[1:4], 3))
#' cluster_performance(params)
#' @export
cluster_performance <- function(model, ...) {
  UseMethod("cluster_performance")
}


#' @export
cluster_performance.kmeans <- function(model, ...) {
  out <- as.data.frame(model[c("totss", "betweenss", "tot.withinss")])
  colnames(out) <- c("Sum_Squares_Total", "Sum_Squares_Between", "Sum_Squares_Within")

  out$R2 <- out$Sum_Squares_Between / out$Sum_Squares_Total

  row.names(out) <- NULL
  class(out) <- c("performance_model", class(out))

  out
}


#' @rdname cluster_performance
#' @export
cluster_performance.hclust <- function(model, data, clusters, ...) {
  if (is.null(data)) {
    insight::format_error(
      "This function requires the data used to compute the clustering to be provided via `data` as it is not accessible from the clustering object itself."
    )
  }
  if (is.null(clusters)) {
    insight::format_error(
      "This function requires a vector of clusters assignments of same length as data to be passed, as it is not contained in the clustering object itself."
    )
  }

  params <- model_parameters(model, data = data, clusters = clusters, ...)

  cluster_performance(params)
}


#' @export
cluster_performance.dbscan <- function(model, data, ...) {
  if (is.null(data)) {
    insight::format_error(
      "This function requires the data used to compute the clustering to be provided via `data` as it is not accessible from the clustering object itself."
    )
  }

  params <- model_parameters(model, data = data, ...)

  cluster_performance(params)
}


# Base --------------------------------------------------------------------


#' @export
cluster_performance.parameters_clusters <- function(model, ...) {
  valid <- model$Cluster != 0 & model$Cluster != "0" # Valid clusters
  out <- data.frame(
    Sum_Squares_Total = attributes(model)$Sum_Squares_Total,
    Sum_Squares_Between = attributes(model)$Sum_Squares_Between,
    Sum_Squares_Within = sum(model$Sum_Squares[valid], na.rm = TRUE)
  )

  out$R2 <- out$Sum_Squares_Between / out$Sum_Squares_Total

  class(out) <- c("performance_model", class(out))

  out
}