1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
|
library(ggplot2)
library(scales)
library(grid)
library(gridExtra)
# Filenames
args <- commandArgs(TRUE)
analysisdir <- args[1];
graphsdir <- args[2];
refid <- args[3];
format <- args[4];
roundUp <- function(x,to=10)
{
to*(x%/%to + as.logical(x%%to))
}
types = c("Template", "Complement", "2D");
colours = c("#CF746D", "#91A851", "#68B5B9");
if (format=="png") {
textsize <- c(40)
pointsize <- c(5)
pointalpha <- c(0.5)
pointshape <- c(1)
pointwidth <- c(3)
xvjust <- c(1.2)
yvjust <- c(1.8)
} else {
textsize <- c(14)
pointsize <- c(2)
pointalpha <-c(0.4)
pointshape <- c(1)
pointwidth <- c(1)
xvjust <- c(0.2)
yvjust <- c(0.8)
}
# Plot GC% vs position
data_gc_filename <- paste(analysisdir, "/", refid, "/", refid, "_gc.txt", sep="");
if (file.exists(data_gc_filename)) {
data_gc = read.table(data_gc_filename, col.name=c("Position", "Coverage"))
if (nrow(data_gc) > 1) {
if (format=="png") {
png_gc <- paste(graphsdir, "/", refid, "/", refid, "_gc.png", sep="");
cat("Writing", png_gc, "\n");
png(png_gc, width=1600, height=400)
print(ggplot(data_gc, aes(x=data_gc$Position, y=data_gc$Coverage)) + geom_line(color="black") + ggtitle("GC content") + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("GC %") + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
} else {
pdf_gc <- paste(graphsdir, "/", refid, "/", refid, "_gc.pdf", sep="");
cat("Writing", pdf_gc, "\n");
pdf(pdf_gc, width=16, height=4)
print(ggplot(data_gc, aes(x=data_gc$Position, y=data_gc$Coverage)) + geom_line(color="black") + ggtitle("GC content") + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("GC %") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
} else {
cat("WARNING: No data in ", data_gc_filename, "\n");
}
} else {
cat("WARNING: Couldn't find", data_gc_filename, "\n");
}
listOfDataFrames <- NULL;
count <-c(1);
# Work out longest kmer
cum_maxk <- 0;
maxk <- 0;
for (t in 1:3) {
type = types[t];
data_perfect_cumulative_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.txt", sep="");
if (file.exists(data_perfect_cumulative_filename)) {
cat("Reading", data_perfect_cumulative_filename, "\n");
data_perfect_cumulative = read.table(data_perfect_cumulative_filename, col.name=c("Size", "n", "Perfect"))
cat("Read", nrow(data_perfect_cumulative), "rows\n");
if (nrow(data_perfect_cumulative) > 0) {
for (i in 1:length(data_perfect_cumulative$Perfect)) {
if (data_perfect_cumulative$Size[i] > maxk) {
maxk <- data_perfect_cumulative$Size[i];
}
if (data_perfect_cumulative$Size[i] > cum_maxk) {
if (data_perfect_cumulative$Perfect[i] > 5) {
cum_maxk <- data_perfect_cumulative$Size[i];
}
}
}
} else {
cat("WARNING: No data in ", data_perfect_cumulative_filename, "\n");
}
}
}
maxk <- maxk + 10;
cum_maxk <- roundUp(cum_maxk);
cat("max k", maxk, "\n");
cat("cum_max k", cum_maxk, "\n");
for (t in 1:3) {
type = types[t];
colourcode = colours[t];
# Plot coverage vs position
data_coverage_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_coverage.txt", sep="");
cat("Reading", data_coverage_filename, "\n");
if (file.exists(data_coverage_filename)) {
data_coverage = read.table(data_coverage_filename, col.name=c("Position", "Coverage"))
if (nrow(data_coverage) > 0) {
if (format=="png") {
png_coverage <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_coverage.png", sep="");
cat("Writing", png_coverage, "\n");
png(png_coverage, width=1600, height=400)
print(ggplot(data_coverage, aes(x=data_coverage$Position, y=data_coverage$Coverage)) + geom_line(color=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("Mean coverage") + expand_limits(y = 0) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
} else {
pdf_coverage <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_coverage.pdf", sep="");
cat("Writing", pdf_coverage, "\n");
pdf(pdf_coverage, width=16, height=4)
print(ggplot(data_coverage, aes(x=data_coverage$Position, y=data_coverage$Coverage)) + geom_line(color=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("Mean coverage") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + expand_limits(y = 0) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
} else {
cat("WARNING: No data in ", data_coverage_filename, "\n");
}
} else {
cat("WARNING: Couldn't find", data_coverage_filename, "\n");
}
# Plot % reads with perfect kmer vs kmer size
data_perfect_cumulative_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.txt", sep="");
if (file.exists(data_perfect_cumulative_filename)) {
cat("Reading", data_perfect_cumulative_filename, "\n");
data_perfect_cumulative = read.table(data_perfect_cumulative_filename, col.name=c("Size", "n", "Perfect"))
if (nrow(data_perfect_cumulative) > 0) {
if (format=="png") {
png_perfect_cumulative <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.png", sep="");
cat("Writing", png_perfect_cumulative, "\n");
png(png_perfect_cumulative, width=1200, height=800)
print(ggplot(data_perfect_cumulative, aes(x=data_perfect_cumulative$Size, y=data_perfect_cumulative$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("kmer size") + ylab("% reads with perfect kmer") + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
} else {
pdf_perfect_cumulative <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.pdf", sep="");
cat("Writing", pdf_perfect_cumulative, "\n");
pdf(pdf_perfect_cumulative, width=6, height=4)
print(ggplot(data_perfect_cumulative, aes(x=data_perfect_cumulative$Size, y=data_perfect_cumulative$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("kmer size") + ylab("% reads with perfect kmer") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
} else {
cat("WARNING: No data in ", data_perfect_cumulative_filename, "\n");
}
} else {
cat("WARNING: Couldn't find", data_perfect_cumulative_filename, "\n");
}
# Plot %reads vs best perfect kmer
#data_perfect_best_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.txt", sep="");
#if (format=="png") {
# png_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.png", sep="");
# png(png_perfect_best, width=1200, height=800)
# data_perfect_best = read.table(data_perfect_best_filename, col.name=c("Size", "n", "Perfect"))
# print(ggplot(data_perfect_best, aes(x=data_perfect_best$Size, y=data_perfect_best$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + scale_x_continuous(limits=c(0, 140)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
#} else {
# pdf_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.pdf", sep="");
# pdf(pdf_perfect_best, width=6, height=4)
# data_perfect_best = read.table(data_perfect_best_filename, col.name=c("Size", "n", "Perfect"))
# print(ggplot(data_perfect_best, aes(x=data_perfect_best$Size, y=data_perfect_best$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, 140)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
#}
#garbage <- dev.off()
# ========== Indels files ==========
# Insertions
data_insertions_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_insertions.txt", sep="");
cat("Reading", data_insertions_filename, "\n");
if (file.exists(data_insertions_filename)) {
data_insertions = read.table(data_insertions_filename, col.name=c("Size", "Percent"))
if (nrow(data_insertions) > 0) {
if (format=="png") {
png_insertions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_insertions.png", sep="");
cat("Writing", png_insertions, "\n");
png(png_insertions, width=1200, height=800)
print(ggplot(data_insertions, aes(x=data_insertions$Size, y=data_insertions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Insertion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
} else {
pdf_insertions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_insertions.pdf", sep="");
cat("Writing", pdf_insertions, "\n");
pdf(pdf_insertions, width=6, height=4)
print(ggplot(data_insertions, aes(x=data_insertions$Size, y=data_insertions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Insertion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
} else {
cat("WARNING: No data in ", data_insertions_filename, "\n");
}
} else {
cat("WARNING: Couldn't find", data_insertions_filename, "\n");
}
# Deletions
data_deletions_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_deletions.txt", sep="");
cat("Reading", data_deletions_filename, "\n");
if (file.exists(data_deletions_filename)) {
data_deletions = read.table(data_deletions_filename, col.name=c("Size", "Percent"))
if (nrow(data_deletions) > 0) {
if (format=="png") {
png_deletions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_deletions.png", sep="");
cat("Writing", png_deletions, "\n");
png(png_deletions, width=1200, height=800)
print(ggplot(data_deletions, aes(x=data_deletions$Size, y=data_deletions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Deletion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
} else {
pdf_deletions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_deletions.pdf", sep="");
cat("Writing", pdf_deletions, "\n");
pdf(pdf_deletions, width=6, height=4)
print(ggplot(data_deletions, aes(x=data_deletions$Size, y=data_deletions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Deletion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
} else {
cat("WARNING: No data in ", data_deletions_filename, "\n");
}
} else {
cat("WARNING: Couldn't find", data_deletions_filename, "\n");
}
# ========== Alignments file ==========
input_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_alignments.txt", sep="");
cat("Reading", input_filename, "\n");
if (file.exists(input_filename)) {
data_alignments = read.table(input_filename, header=TRUE);
if (nrow(data_alignments) > 1) {
# Length vs Identity histograms
if (format=="png") {
identity_hist_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_hist.png", sep="")
cat("Writing", identity_hist_png, "\n");
png(identity_hist_png, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$QueryPercentIdentity)) + geom_histogram(fill=colourcode) + xlab("Read identity %") +ylab("Count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) )
} else {
identity_hist_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_hist.pdf", sep="")
cat("Writing", identity_hist_pdf, "\n");
pdf(identity_hist_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$QueryPercentIdentity)) + geom_histogram(fill=colourcode) + xlab("Read identity %") +ylab("Count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
# GC histogram
if (format=="png") {
identity_hist_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_GC_hist.png", sep="")
cat("Writing", identity_hist_png, "\n");
png(identity_hist_png, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$QueryGC)) + geom_histogram(fill=colourcode, binwidth=1) + xlab("GC %") +ylab("Read count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_x_continuous(limits=c(0, 100)) )
} else {
identity_hist_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_GC_hist.pdf", sep="")
cat("Writing", identity_hist_pdf, "\n");
pdf(identity_hist_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$QueryGC)) + geom_histogram(fill=colourcode, binwidth = 1) + xlab("GC %") +ylab("Read count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_x_continuous(limits=c(0, 100)))
}
garbage <- dev.off()
# Identity vs Length Scatter plots
if (format=="png") {
identity_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_scatter.png", sep="");
cat("Writing", identity_scatter_pdf, "\n");
png(identity_scatter_pdf, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
#grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
} else {
identity_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_scatter.pdf", sep="");
cat("Writing", identity_scatter_pdf, "\n");
pdf(identity_scatter_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
# Identity vs Length heatmap
if (format=="png") {
identity_heatmap_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap.png", sep="");
cat("Writing", identity_heatmap_png, "\n");
png(identity_heatmap_png, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,2)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue'))
#limits=c(0,50), breaks=seq(0, 40, by=10), colours=rainbow(4)
} else {
identity_heatmap_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap.pdf", sep="");
cat("Writing", identity_heatmap_pdf, "\n");
pdf(identity_heatmap_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,2)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))+ scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue'))
}
garbage <- dev.off()
# Identity vs Length heatmap zoomed
if (format=="png") {
identity_heatmap_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap_zoom.png", sep="");
cat("Writing", identity_heatmap_png, "\n");
png(identity_heatmap_png, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,1)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue')+ scale_y_continuous(limits=c(60, 100)))
#limits=c(0,50), breaks=seq(0, 40, by=10), colours=rainbow(4)
} else {
identity_heatmap_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap_zoom.pdf", sep="");
cat("Writing", identity_heatmap_pdf, "\n");
pdf(identity_heatmap_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,1)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))+ scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue')+ scale_y_continuous(limits=c(60, 100)))
}
garbage <- dev.off()
# Identity boxplot
listOfDataFrames[[count]] <- data.frame(Readset=type, Variable=data_alignments$QueryPercentIdentity);
count <- count + 1;
# Alignment identity vs. Fraction of read aligned scatter plots
if (format=="png") {
aid_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_alignment_identity_scatter.png", sep="");
cat("Writing", aid_scatter_png, "\n");
png(aid_scatter_png, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$AlignmentPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) )
#grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
} else {
aid_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_alignment_identity_scatter.pdf", sep="");
cat("Writing", aid_scatter_pdf, "\n");
pdf(aid_scatter_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$AlignmentPercentIdentity)) + geom_point(shape=pointshape, alpha=0.4, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
# Query identity vs. Fraction of read aligned scatter plots
if (format=="png") {
qid_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_query_identity_scatter.png", sep="");
cat("Writing", qid_scatter_png, "\n");
png(qid_scatter_png, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
#grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
} else {
qid_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_query_identity_scatter.pdf", sep="");
cat("Writing", qid_scatter_pdf, "\n");
pdf(qid_scatter_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
# Best perfect sequence vs. length scatters
if (format=="png") {
best_perf_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_scatter.png", sep="");
cat("Writing", best_perf_scatter_png, "\n");
png(best_perf_scatter_png, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
#grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
} else {
best_perf_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_scatter.pdf", sep="");
cat("Writing", best_perf_scatter_pdf, "\n");
pdf(best_perf_scatter_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
# Best perfect sequence vs. length scatters zoomed
if (format=="png") {
best_perf_zoom_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_zoom_scatter.png", sep="");
cat("Writing", best_perf_zoom_scatter_png, "\n");
png(best_perf_zoom_scatter_png, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
#grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
} else {
best_perf_zoom_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_zoom_scatter.pdf", sep="");
cat("Writing", best_perf_zoom_scatter_pdf, "\n");
pdf(best_perf_zoom_scatter_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
# Plot %reads vs best perfect kmer
cat(data_alignments$LongestPerfectKmer);
hdf <- hist(breaks=seq(0,maxk,by=10), x=data_alignments$LongestPerfectKmer, plot=FALSE, right=FALSE); # bins are 0-9, 10-19, 20-29 etc.
hdf$density = hdf$counts/sum(hdf$counts)*100
tdf <- data.frame(Pos=hdf$mids, Counts=hdf$density);
if (format=="png") {
png_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.png", sep="");
cat("Writing", png_perfect_best, "\n");
png(png_perfect_best, width=1200, height=800)
print(ggplot(tdf, aes(Pos, Counts)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
} else {
pdf_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.pdf", sep="");
cat("Writing", pdf_perfect_best, "\n");
pdf(pdf_perfect_best, width=6, height=4)
print(ggplot(tdf, aes(Pos, Counts)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
# Number of perfect 21mers verses length scatter
if (format=="png") {
nk21_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_nk21_vs_length_scatter.png", sep="");
cat("Writing", nk21_scatter_png, "\n");
png(nk21_scatter_png, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$nk21)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
#grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
} else {
nk21_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_nk21_vs_length_scatter.pdf", sep="");
cat("Writing", nk21_scatter_pdf, "\n");
pdf(nk21_scatter_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$nk21)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
# Mean perfect sequence vs. length scatters
#mean_perf_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_mean_perfect_vs_length_scatter.pdf", sep="");
#pdf(mean_perf_scatter_pdf, height=4, width=6)
#print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$MeanPerfectKmer), xlab="Read length") + geom_point(shape=pointshape, alpha=pointalpha) + xlab("Read length") +ylab("Mean perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000)))
#garbage <- dev.off()
# Percentage of read aligned vs read length
if (format=="png") {
output_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_percent_aligned_vs_length_scatter.png", sep="");
cat("Writing", output_png, "\n");
png(output_png, width=1200, height=800)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$PercentQueryAligned)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Percentage of read aligned") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
#grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
} else {
output_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_percent_aligned_vs_length_scatter.pdf", sep="");
cat("Writing", output_pdf, "\n");
pdf(output_pdf, width=6, height=4)
print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$PercentQueryAligned)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Percentage of read aligned") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
}
garbage <- dev.off()
} else {
cat("WARNING: No data in ", input_filename, "\n");
}
} else {
cat("WARNING: Couldn't find", input_filename, "\n");
}
# ========== Kmer file ==========
# Kmer abundance with labels
input_kmers <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_kmers.txt", sep="");
cat("Reading", input_kmers, "\n");
if (file.exists(input_kmers)) {
data_kmers = read.table(input_kmers, header=TRUE);
if (nrow(data_kmers) > 1) {
if (format=="png") {
kmer_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_kmer_scatter.png", sep="");
cat("Writing", kmer_scatter_png, "\n");
png(kmer_scatter_png, width=1200, height=1200)
print(ggplot(data_kmers, aes(x=data_kmers$RefPc, y=data_kmers$ReadPc)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Reference abundance %") +ylab("Reads abundance %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 0.3)) + scale_y_continuous(limits=c(0, 0.3)) + geom_text(aes(label=data_kmers$Kmer), size=4) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) )
#grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
} else {
kmer_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_kmer_scatter.pdf", sep="");
cat("Writing", kmer_scatter_pdf, "\n");
pdf(kmer_scatter_pdf, width=6, height=6)
print(ggplot(data_kmers, aes(x=data_kmers$RefPc, y=data_kmers$ReadPc)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Reference abundance %") +ylab("Reads abundance %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 0.3)) + scale_y_continuous(limits=c(0, 0.3)) + geom_text(aes(label=data_kmers$Kmer), size=1) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) )
}
garbage <- dev.off()
} else {
cat("WARNING: No data in ", input_kmers, "\n");
}
} else {
cat("WARNING: Couldn't find", input_kmers, "\n");
}
}
# Identity boxplot
df <- do.call("rbind", listOfDataFrames);
if (format=="png") {
output_file <- paste(graphsdir, "/", refid, "/", refid, "_query_identity_boxplot.png", sep="");
cat("Writing", output_file, "\n");
png(output_file, width=1200, height=800);
print(ggplot(df, aes(x=Readset, y=Variable, fill=Readset)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %") + scale_y_continuous(limits=c(70, 100)));
} else {
output_file <- paste(graphsdir, "/", refid, "/", refid, "_query_identity_boxplot.pdf", sep="");
cat("Writing", output_file, "\n");
pdf(output_file, width=6, height = 4);
print(ggplot(df, aes(x=Readset, y=Variable, fill=Readset)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %") + scale_y_continuous(limits=c(70, 100)));
}
garbage <- dev.off();
|