File: plotDensityClust.R

package info (click to toggle)
r-cran-densityclust 0.3.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 280 kB
  • sloc: cpp: 175; sh: 12; makefile: 2
file content (201 lines) | stat: -rw-r--r-- 6,743 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#' @name plotDensityClust
#' @title Plot densityCluster results
#' @description Generate a single panel of up to three diagnostic plots for a
#'   \code{densityClust} object.
#'
#' @param x A densityCluster object as produced by \code{\link{densityClust}}
#' @param type A character vector designating which figures to produce. Valid
#'   options include \code{"dg"} for a decision graph of \eqn{\delta} vs.
#'   \eqn{\rho}, \code{"gg"} for a gamma graph depicting the decrease of
#'   \eqn{\gamma} (= \eqn{\delta} * \eqn{\rho}) across samples, and \code{"mds"},
#'   for a Multi-Dimensional Scaling (MDS) plot of observations. Any combination
#'   of these three can be included in the vector, or to produce all plots,
#'   specify \code{type = "all"}.
#' @param n Number of observations to plot in the gamma graph.
#' @param mds A matrix of scores for observations from a Principal Components
#'   Analysis or MDS. If omitted, and a MDS plot has been requested, one will
#'   be calculated.
#' @param dim.x,dim.y The numbers of the dimensions to plot on the x and y
#'   axes of the MDS plot.
#' @param col Vector of colors for clusters.
#' @param alpha Value in \code{0:1} controlling transparency of points in the
#'   decision graph and MDS plot.
#'
#' @return A panel of the figures specified in \code{type} are produced.
#'   If designated, clusters are color-coded and labelled. If present in
#'   \code{x}, the rho and delta thresholds are designated in the
#'   decision graph by a set of solid black lines.
#'
#' @author Eric Archer \email{eric.archer@@noaa.gov}
#'
#' @examples
#' data(iris)
#' data.dist <- dist(iris[, 1:4])
#' pca <- princomp(iris[, 1:4])
#'
#' # Run initial density clustering
#' dens.clust <- densityClust(data.dist)
#
#' op <- par(ask = TRUE)
#'
#' # Show the decision graph
#' plotDensityClust(dens.clust, type = "dg")
#'
#' # Show the decision graph and the gamma graph
#' plotDensityClust(dens.clust, type = c("dg", "gg"))
#'
#' # Cluster based on rho and delta
#' new.clust <- findClusters(dens.clust, rho = 4, delta = 2)
#'
#' # Show all graphs with clustering
#' plotDensityClust(new.clust, mds = pca$scores)
#'
#' par(op)
#'
#' @importFrom RColorBrewer brewer.pal
#' @importFrom ggplot2 ggplot aes_string geom_text geom_point geom_segment labs
#'   theme_bw theme scale_color_manual geom_line geom_label
#' @importFrom ggrepel geom_label_repel
#' @importFrom gridExtra grid.arrange
#' @importFrom grDevices rainbow
#' @export
#'
plotDensityClust <- function(x, type = "all", n = 20,
                             mds = NULL, dim.x = 1, dim.y = 2,
                             col = NULL, alpha = 0.8) {

  type <- tolower(type)
  if(any(pmatch(type, "all", nomatch = 0))) type <- c("dg", "gg", "mds")

  df <- data.frame(
    rho = x$rho, delta = x$delta, gamma = x$rho * x$delta,
    peaks = FALSE, cluster = factor(x$clusters), halo = x$halo
  )
  df$peaks[x$peaks] <- TRUE

  if(is.null(col)) {
    num.cols <- max(nlevels(df$cluster), 3)
    col <- if(num.cols <= 8) {
      brewer.pal(num.cols, "Set2")
    } else if(num.cols <= 12) {
      brewer.pal(num.cols, "Set3")
    } else rainbow(num.cols + 1)[1:num.cols]
  }

  plots <- list(dg = NULL, gg = NULL, mds = NULL)

  # Plot decision graph (dg)
  if(any(pmatch(type, "dg", nomatch = 0))) {
    plots$dg <- ggplot(df, aes_string(x = "rho", y = "delta"))
    if(!any(is.na(x$threshold))) {
      rho <- x$threshold["rho"]
      delta <- x$threshold["delta"]
      thresh.df <- data.frame(
        x = c(rho, rho),
        y = c(delta, delta),
        xend = c(rho, Inf),
        yend = c(Inf, delta)
      )
      plots$dg <- plots$dg +
        geom_segment(
          aes_string(x = "x", xend = "xend", y = "y", yend = "yend"),
          data = thresh.df, inherit.aes = F,
          lineend = "butt"
        )
    }
    if(any(df$peaks)) {
      plots$dg <- plots$dg +
        geom_label(
          aes_string(label = "cluster", color = "cluster"),
          data = df[df$peaks, ],
          fontface = "bold", alpha = alpha
        ) +
        scale_color_manual(values = col)
    }
    plots$dg <- plots$dg  +
      geom_point(
        data = df[!df$peaks, ],
        size = 3, color = "gray50", alpha = alpha
      ) +
      labs(x = expression(rho), y = expression(delta), color = "Cluster") +
      theme(legend.position = "none")
  }

  # Plot gamma graph (gg)
  if(any(pmatch(type, "gg", nomatch = 0))) {
    gg.df <- df[order(df$gamma, decreasing = TRUE), ]
    gg.df <- gg.df[1:n, , drop = FALSE]
    gg.df$Sample <- 1:nrow(gg.df)

    plots$gg <- ggplot(gg.df, aes_string(x = "Sample", y = "gamma")) + geom_line()
    if(any(gg.df$peaks)) {
      plots$gg <- plots$gg +
        geom_label(
          aes_string(label = "cluster", color = "cluster"),
          data = gg.df[gg.df$peaks, , drop = FALSE],
          fontface = "bold", alpha = alpha
        ) +
        scale_color_manual(values = col)
    }
    plots$gg <- plots$gg +
      geom_point(
        data = gg.df[!gg.df$peaks, , drop = FALSE],
        size = 3, color = "gray50"
      ) +
      labs(y = expression(gamma), color = "Cluster") +
      theme(legend.position = "none")
  }

  # Plot MDS (mds)
  if(any(pmatch(type, "mds", nomatch = 0))) {
    if(is.null(mds)) mds <- cmdscale(x$distance, k = max(dim.x, dim.y))
    df$x <- mds[, dim.x]
    df$y <- mds[, dim.y]

    plots$mds <- ggplot()
    plots$mds <- if(all(is.na(df$cluster))) {
      plots$mds +
        geom_point(
          aes_string(x = "x", y = "y"),
          data = df,
          size = 3, color = "gray50", alpha = alpha
        )
    } else {
      plots$mds +
        geom_point(
          aes_string(x = "x", y = "y", color = "cluster"),
          data = df[df$halo, , drop = FALSE],
          shape = 21, size = 3
        ) +
        geom_point(
          aes_string(x = "x", y = "y", color = "cluster"),
          data = df[!df$halo, , drop = FALSE],
          size = 3, alpha = alpha
        ) +
        geom_label_repel(
          aes_string(x = "x", y = "y", label = "cluster", color = "cluster"),
          data = df[df$peaks, , drop = FALSE],
          size = 6, fontface = "bold", alpha = alpha
        ) +
        scale_color_manual(values = col, na.value = "gray50")
    }
    plots$mds <- plots$mds +
      labs(x = paste("Dimension", dim.x), y = paste("Dimension", dim.y)) +
      theme(legend.position = "none")
  }

  has.plot <- !sapply(plots, is.null)
  switch(
    sum(has.plot),
    print(plots[[which(has.plot)]]),
    {
      plots <- plots[has.plot]
      if("mds" %in% names(plots)) plots$nrow <- 2 else plots$ncol <-2
      do.call(grid.arrange, plots)
    },
    {
      plots$layout_matrix <- matrix(c(1, 3, 2, 3), nrow = 2)
      do.call(grid.arrange, plots)
    }
  )
}