File: color_branches.Rd

package info (click to toggle)
r-cran-dendextend 1.14.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,888 kB
  • sloc: sh: 13; makefile: 2
file content (244 lines) | stat: -rw-r--r-- 7,626 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/color_branches.R
\name{color_branches}
\alias{color_branches}
\alias{colour_branches}
\alias{branches_color}
\title{Color tree's branches according to sub-clusters}
\source{
This function is a derived work from the \code{\link[dendroextras]{color_clusters}}
function, with some ideas from the \code{\link[dendroextras]{slice}} function -
both are from the {\pkg{dendroextras}} package by jefferis.

It extends it by using \link[dendextend]{cutree.dendrogram} - allowing
the function to work for trees that hclust can not handle
(unbranched and non-ultrametric trees).
Also, it allows REPEATED cluster color assignments to branches on to
the same tree. Something which the original function was not able to handle.
}
\usage{
color_branches(
  dend,
  k = NULL,
  h = NULL,
  col,
  groupLabels = NULL,
  clusters,
  warn = dendextend_options("warn"),
  ...
)
}
\arguments{
\item{dend}{A \code{dendrogram} or \code{hclust} tree object}

\item{k}{number of groups (passed to \code{\link[dendextend]{cutree}})}

\item{h}{height at which to cut tree (passed to \code{\link[dendextend]{cutree}})}

\item{col}{Function or vector of Colors. By default it tries to use
\link[colorspace]{rainbow_hcl} from the \code{colorspace} package.
(with parameters c=90 and l=50). If \code{colorspace} is not available,
It will fall back on the \link{rainbow} function.}

\item{groupLabels}{If TRUE add numeric group label - see Details for options}

\item{clusters}{an integer vector of clusters. This is passed to \link{branches_attr_by_clusters}.
This HAS to be of the same length as the number of leaves.
Items that belong to no cluster should get the value 0.
The vector should be of the same order as that of the labels in the dendrogram.
If you create the clusters from something like \link{cutree} you would first
need to use \link{order.dendrogram} on it, before using it in the function.}

\item{warn}{logical (default from dendextend_options("warn") is FALSE).
Set if warning are to be issued, it is safer to keep this at TRUE,
but for keeping the noise down, the default is FALSE.}

\item{...}{ignored.}
}
\value{
a tree object of class \link{dendrogram}.
}
\description{
This function is for dendrogram and hclust objects.
This function colors both the terminal leaves of a dend's cluster and the edges
leading to those leaves. The edgePar attribute of nodes will be augmented by
a new list item col.
The groups will be defined by a call to \code{\link[dendextend]{cutree}}
using the k or h parameters.

If col is a color vector with a different length than the number of clusters
(k) - then a recycled color vector will be used.
}
\details{
If \code{groupLabels=TRUE} then numeric group labels will be added
  to each cluster. If a vector is supplied then these entries will be used as
  the group labels. If a function is supplied then it will be passed a
  numeric vector of groups (e.g. 1:5) and must return the formatted group
  labels.

If the \link{labels} of the dendrogram are NOT character (but, for example
integers) - they are coerced into character. This step is essential for the
proper operation of the function. A dendrogram labels might happen to be
integers if they are based on an \link{hclust} performed on a \link{dist}
of an object without \link{rownames}.
}
\examples{

\dontrun{
par(mfrow = c(1, 2))
dend <- USArrests \%>\%
  dist() \%>\%
  hclust(method = "ave") \%>\%
  as.dendrogram()
d1 <- color_branches(dend, k = 5, col = c(3, 1, 1, 4, 1))
plot(d1) # selective coloring of branches :)
d2 <- color_branches(d1, 5)
plot(d2)

par(mfrow = c(1, 2))
d1 <- color_branches(dend, 5, col = c(3, 1, 1, 4, 1), groupLabels = TRUE)
plot(d1) # selective coloring of branches :)
d2 <- color_branches(d1, 5, groupLabels = TRUE)
plot(d2)

par(mfrow = c(1, 3))
d5 <- color_branches(dend, 5)
plot(d5)
d5g <- color_branches(dend, 5, groupLabels = TRUE)
plot(d5g)
d5gr <- color_branches(dend, 5, groupLabels = as.roman)
plot(d5gr)

par(mfrow = c(1, 1))

# messy - but interesting:
dend_override <- color_branches(dend, 2, groupLabels = as.roman)
dend_override <- color_branches(dend_override, 4, groupLabels = as.roman)
dend_override <- color_branches(dend_override, 7, groupLabels = as.roman)
plot(dend_override)

d5 <- color_branches(dend = dend[[1]], k = 5)


library(dendextend)
data(iris, envir = environment())
d_iris <- dist(iris[, -5])
hc_iris <- hclust(d_iris)
dend_iris <- as.dendrogram(hc_iris)
dend_iris <- color_branches(dend_iris, k = 3)

library(colorspace)
labels_colors(dend_iris) <-
  rainbow_hcl(3)[sort_levels_values(
    as.numeric(iris[, 5])[order.dendrogram(dend_iris)]
  )]

plot(dend_iris,
  main = "Clustered Iris dataset",
  sub = "labels are colored based on the true cluster"
)



# cutree(dend_iris,k=3, order_clusters_as_data=FALSE,
#  try_cutree_hclust=FALSE)
# cutree(dend_iris,k=3, order_clusters_as_data=FALSE)

library(colorspace)

data(iris, envir = environment())
d_iris <- dist(iris[, -5])
hc_iris <- hclust(d_iris)
labels(hc_iris) # no labels, because "iris" has no row names
dend_iris <- as.dendrogram(hc_iris)
is.integer(labels(dend_iris)) # this could cause problems...

iris_species <- rev(levels(iris[, 5]))
dend_iris <- color_branches(dend_iris, k = 3, groupLabels = iris_species)
is.character(labels(dend_iris)) # labels are no longer "integer"

# have the labels match the real classification of the flowers:
labels_colors(dend_iris) <-
  rainbow_hcl(3)[sort_levels_values(
    as.numeric(iris[, 5])[order.dendrogram(dend_iris)]
  )]

# We'll add the flower type
labels(dend_iris) <- paste(as.character(iris[, 5])[order.dendrogram(dend_iris)],
  "(", labels(dend_iris), ")",
  sep = ""
)

dend_iris <- hang.dendrogram(dend_iris, hang_height = 0.1)

# reduce the size of the labels:
dend_iris <- assign_values_to_leaves_nodePar(dend_iris, 0.5, "lab.cex")

par(mar = c(3, 3, 3, 7))
plot(dend_iris,
  main = "Clustered Iris dataset
     (the labels give the true flower species)",
  horiz = TRUE, nodePar = list(cex = .007)
)
legend("topleft", legend = iris_species, fill = rainbow_hcl(3))
a <- dend_iris[[1]]
dend_iris1 <- color_branches(a, k = 3)
plot(dend_iris1)

# str(dendrapply(d2, unclass))
# unclass(d1)

c(1:5) \%>\% # take some data
  dist() \%>\% # calculate a distance matrix,
  # on it compute hierarchical clustering using the "average" method,
  hclust(method = "single") \%>\%
  as.dendrogram() \%>\%
  color_branches(k = 3) \%>\%
  plot() # nice, returns the tree as is...


# Example of the "clusters" parameter
par(mfrow = c(1, 2))
dend <- c(1:5) \%>\%
  dist() \%>\%
  hclust() \%>\%
  as.dendrogram()
dend \%>\%
  color_branches(k = 3) \%>\%
  plot()
dend \%>\%
  color_branches(clusters = c(1, 1, 2, 2, 3)) \%>\%
  plot()


# another example, based on the question here:
# https://stackoverflow.com/q/45432271/256662


library(cluster)
set.seed(999)
iris2 <- iris[sample(x = 1:150, size = 50, replace = F), ]
clust <- diana(iris2)
dend <- as.dendrogram(clust)

temp_col <- c("red", "blue", "green")[as.numeric(iris2$Species)]
temp_col <- temp_col[order.dendrogram(dend)]
temp_col <- factor(temp_col, unique(temp_col))

library(dendextend)
dend \%>\%
  color_branches(clusters = as.numeric(temp_col), col = levels(temp_col)) \%>\%
  set("labels_colors", as.character(temp_col)) \%>\%
  plot()
}

}
\seealso{
\code{\link[dendextend]{cutree}},\code{\link{dendrogram}},
\code{\link{hclust}}, \code{\link{labels_colors}},
\code{\link{branches_attr_by_clusters}}, \link{get_leaves_branches_col},
\link{color_labels}
}
\author{
Tal Galili, extensively based on code by Gregory Jefferis
}