File: plot_roc.r

package info (click to toggle)
libvcflib 1.0.12%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 70,520 kB
  • sloc: cpp: 39,837; python: 532; perl: 474; ansic: 317; ruby: 295; sh: 254; lisp: 148; makefile: 123; javascript: 94
file content (151 lines) | stat: -rwxr-xr-x 6,014 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env Rscript
# Plot ROC 

require(plyr)
require(ggplot2)
require(pracma)
require(grid)

argv <- commandArgs(trailingOnly = TRUE)

prefix <- gsub("\\s","", argv[1])
print(prefix)
truthset <- argv[2]
print(truthset)
results <- argv[3]
print(results)
xmin <- as.numeric(argv[4])
xmax <- as.numeric(argv[5])
ymin <- as.numeric(argv[6])
ymax <- as.numeric(argv[7])

roc <- read.delim(results)

bests <- ddply(roc, .(set), function(x) { data.frame(best_snps=with(x, min(false_negative_snps + false_positive_snps)), best_snp_threshold=min(subset(x, (false_negative_snps + false_positive_snps) == with(x, min(false_negative_snps + false_positive_snps)))$threshold ), best_indels=with(x, min(false_negative_indels + false_positive_indels)), best_indel_threshold=min(subset(x, (false_negative_indels + false_positive_indels) == with(x, min(false_negative_indels + false_positive_indels)))$threshold )) })

write.table(bests, paste(prefix, ".bests.tsv", sep=""), row.names=FALSE, quote=FALSE, sep="\t")

#abs(trapz(c(1, roc$complexfpr), c(1, roc$complextpr)))

true_snps <- with(subset(roc, set==truthset), max(num_snps))
true_indels <- with(subset(roc, set==truthset), max(num_indels))

# get ROC AUC
auc <- ddply(roc, .(set),
      function(x) {
        data.frame(
                   snp_auc=ifelse(true_snps>0,
                     with(x,
                          abs(trapz(c(1,
                                      false_positive_snps/(false_positive_snps+ max(false_negative_snps + num_snps - false_positive_snps))),
                                    c(max(1- false_negative_snps/true_snps),
                                      1- false_negative_snps/true_snps)))),
                     0),
                   indel_auc=ifelse(true_indels>0,
                     with(x,
                          abs(trapz(c(1,
                                      false_positive_indels/(false_positive_indels+ max(false_negative_indels + num_indels - false_positive_indels))),
                                    c(max(1- false_negative_indels/true_indels),
                                      1- false_negative_indels/true_indels)))),
                     0)
                   )
      }
      )

write.table(auc, paste(prefix, ".auc.tsv", sep=""), row.names=FALSE, quote=FALSE, sep="\t")


rocsnps <- ddply(roc, .(set),
      function(x) {
        data.frame(
                   FPR=
                     with(x,
                          c(1,
                            false_positive_snps/(false_positive_snps+ max(false_negative_snps + num_snps - false_positive_snps)))),
                   TPR=
                      with(x,
                          c(max(1- false_negative_snps/true_snps),
                            1- false_negative_snps/true_snps)),
                   type=as.factor("snps")
                   )
          }
      )

rocindels <- ddply(roc, .(set),
      function(x) {
        data.frame(
                   FPR=
                     with(x,
                          c(1,
                            false_positive_indels/(false_positive_indels+ max(false_negative_indels + num_indels - false_positive_indels)))),
                   TPR=
                      with(x,
                          c(max(1- false_negative_indels/true_indels),
                            1- false_negative_indels/true_indels)),
                   type=as.factor("indels")
                   )
          }
      )


if (FALSE) {
if (true_snps>0) {
  ggplot(subset(roc, set != truthset),
         aes(false_positive_snps/(false_positive_snps+with(subset(roc, set==set), max(false_negative_snps + num_snps - false_positive_snps))),
             1- false_negative_snps/with(subset(roc, set==set), max(false_negative_snps + num_snps - false_positive_snps)),
             group=set,
             color=set)) + scale_x_continuous("false positive rate") + scale_y_continuous("true positive rate") + geom_path() + theme_bw()
            + coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
  ggsave(paste(prefix, ".snps.png", sep=""), height=6, width=9)
}

if (true_indels>0) {
  ggplot(subset(roc, set != truthset),
         aes(false_positive_indels/(false_positive_indels+with(subset(roc, set==set), max(false_negative_indels + num_indels - false_positive_indels))),
             1- false_negative_indels/with(subset(roc, set==set), max(false_negative_indels + num_indels - false_positive_indels)),
             group=set,
             color=set)) + scale_x_continuous("false positive rate") + scale_y_continuous("true positive rate") + geom_path() + theme_bw()
            + coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
  ggsave(paste(prefix, ".indels.png", sep=""), height=6, width=9)
}
}


# new versions
if (true_snps>0) {
  ggplot(subset(rocsnps, set != truthset),
         aes(FPR,
             TPR,
             group=set,
             color=set)) + scale_x_continuous("false positive rate") + scale_y_continuous("true positive rate") + geom_path() + theme_bw() + coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
  ggsave(paste(prefix, ".snps.png", sep=""), height=6, width=9)
}

if (true_indels>0) {
  ggplot(subset(rocindels, set != truthset),
         aes(FPR,
             TPR,
             group=set,
             color=set)) + scale_x_continuous("false positive rate") + scale_y_continuous("true positive rate") + geom_path() + theme_bw() + coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
  ggsave(paste(prefix, ".indels.png", sep=""), height=6, width=9)
}

if (true_indels>0 && true_snps>0) {

(
  ggplot(subset(rbind(rocsnps,rocindels), set != truthset),
         aes(FPR,
             TPR,
             group=set,
             color=set))
    + scale_x_continuous("false positive rate")
    + scale_y_continuous("true positive rate")
    + geom_path()
    + theme_bw()
    + coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
    + facet_grid(type ~ .)
    + theme(panel.margin = unit(1, "lines")) 
)
  ggsave(paste(prefix, ".both.png", sep=""), height=5, width=5)

}