File: dendro.r

package info (click to toggle)
simka 1.5.3-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,200 kB
  • sloc: cpp: 5,321; python: 672; sh: 386; makefile: 26
file content (87 lines) | stat: -rwxr-xr-x 2,548 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#Author: Gaetan Benoit
#Contact: gaetan.benoit@inria.fr


args <- commandArgs(trailingOnly = TRUE)
distanceMatrixFilename = args[1]
distance_name = basename(distanceMatrixFilename)
distance_name = unlist(strsplit(distance_name, "[.]"))[1]
distance_name = gsub("mat_", "", distance_name)


distanceMatrix = as.matrix(read.table(file=distanceMatrixFilename, sep=";", header=TRUE, row.names=1))
distanceMatrix[lower.tri(distanceMatrix)] <- t(distanceMatrix)[lower.tri(distanceMatrix)] #symmetrize matrix


width = as.numeric(args[3])
height = as.numeric(args[4])
format = args[5]

if(format == "png"){
	png(file=paste0(args[2], ".png"), width=width, height=height, units="in",res=72)
} else{
	pdf(file=paste0(args[2], ".pdf"), width=width, height=height)
}


use_metadata = F
if(length(args) == 7){
	suppressPackageStartupMessages(library(dendextend))
	
	use_metadata = T
	metadata_table = as.matrix(read.table(file=args[6], sep=";", header=TRUE, row.names=1))
	metadata_variable = args[7]
	#print(metadata_table)
	variables = metadata_table[,metadata_variable]
	#print(variables)
	
	meatadata_index = list()
	dataset_ids = rownames(metadata_table)
	for(i in 1:length(dataset_ids)){
		dataset_id = dataset_ids[i]
		#print(dataset_id)
		#print(variables[[i]])
		meatadata_index[[dataset_id]] = variables[[i]]
		print(paste0(dataset_id, " ", variables[[i]]))
		#print(meatadata_index[[dataset_id]])
	}
	
	colors = c()
	dataset_ids = rownames(distanceMatrix)
	for(i in 1:dim(distanceMatrix)[1]){
		dataset_id = dataset_ids[i]
		colors = c(colors, meatadata_index[[dataset_id]])
	}
	colors_numeric_temp = c()
	colors_numeric = as.numeric(as.factor(colors))
	for(i in 1:length(colors_numeric)){
		colors_numeric_temp = c(colors_numeric_temp, colors_numeric[i]+1)
	}
	colors_numeric = colors_numeric_temp
	#print(colors)
}




distanceMatrix = distanceMatrix*100
#inv_cr3 = matrix(100, ncol=dim(cr3)[1], nrow=dim(cr3)[1]) - cr3
Commet_distance = as.dist(distanceMatrix)
hc = hclust(Commet_distance, method="average")
dendo_cr3 = as.dendrogram(hc)

if(use_metadata){
	
	colors_numeric_hc = colors_numeric[hc$order]
	dendo_cr3 %>% set("labels_col", colors_numeric_hc) %>% set("branches_k_color", colors_numeric_hc) %>% # change color
	plot(main=paste0("Simka hierarchical clustering\n", distance_name), cex = 0.3, xlab="", sub="")
	legend("topright", title=metadata_variable, legend=unique(colors), col=unique(colors_numeric), pch=16)

} else{
	plot(dendo_cr3, main=paste0("Simka hierarchical clustering\n", distance_name), cex = 0.3, xlab="", sub="")

}