1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
|
library(ggplot2)
library(scales)
library(grid)
library(gridExtra)
# Filenames
args <- commandArgs(TRUE)
analysisdir <- args[1];
graphsdir <- args[2];
samplelist <- args[3];
outdir <- args[4];
reference <- args[5];
format <- args[6];
types = c("2D", "Template", "Complement");
colours = c("#68B5B9", "#CF746D", "#91A851");
if (format=="png") {
textsize <- c(40)
pointsize <- c(5)
pointalpha <- c(0.5)
pointshape <- c(1)
pointwidth <- c(3)
xvjust <- c(1.2)
yvjust <- c(1.8)
} else {
textsize <- c(10)
pointsize <- c(2)
pointalpha <-c(0.4)
pointshape <- c(1)
pointwidth <- c(1)
xvjust <- c(0.2)
yvjust <- c(0.8)
}
data_samples = read.table(samplelist, header=TRUE);
imagewidth <- 1 + nrow(data_samples) * 0.5;
# Query identity
for (t in 1:3) {
df <- data.frame();
listOfDataFrames <- NULL;
count <- c(1);
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
message(filename_data);
if (file.exists(filename_data)) {
data_field = read.table(filename_data, header=TRUE);
message(nrow(data_field));
if (nrow(data_field) > 0) {
thisid <- data_samples[i, "SampleName"];
listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$QueryPercentIdentity);
count <- count + 1;
}
}
}
df <- do.call("rbind", listOfDataFrames);
output_file <- paste(graphsdir, "/", reference, "_", type, "_query_identity.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %"));
garbage <- dev.off();
output_file <- paste(graphsdir, "/", reference, "_", type, "_query_identity_zoom.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %") + scale_y_continuous(limits=c(60, 100)));
garbage <- dev.off();
}
# Query GC
for (t in 1:3) {
df <- data.frame();
listOfDataFrames <- NULL;
count <- c(1);
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_data <- paste(sampledir, "/", analysisdir ,"/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
if (file.exists(filename_data)) {
data_field = read.table(filename_data, header=TRUE);
message(nrow(data_field));
if (nrow(data_field) > 0) {
thisid <- data_samples[i, "SampleName"];
listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$QueryGC);
count <- count + 1;
}
}
}
df <- do.call("rbind", listOfDataFrames);
output_file <- paste(graphsdir, "/", reference, "_", type, "_query_gc.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read GC %"));
garbage <- dev.off();
}
# Best perfect kmer
for (t in 1:3) {
df <- data.frame();
listOfDataFrames <- NULL;
count <- c(1);
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
if (file.exists(filename_data)) {
data_field = read.table(filename_data, header=TRUE);
if (nrow(data_field) > 0) {
thisid <- data_samples[i, "SampleName"];
listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$LongestPerfectKmer);
count <- count + 1;
}
}
}
df <- do.call("rbind", listOfDataFrames);
output_file <- paste(graphsdir, "/", reference, "_", type, "_best_perfect_kmer.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Best perfect kmer"));
garbage <- dev.off();
}
#PercentQueryAligned
for (t in 1:3) {
df <- data.frame();
listOfDataFrames <- NULL;
count <- c(1);
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
if (file.exists(filename_data)) {
data_field = read.table(filename_data, header=TRUE);
if (nrow(data_field) > 0) {
thisid <- data_samples[i, "SampleName"];
listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$PercentQueryAligned);
count <- count + 1;
}
}
}
df <- do.call("rbind", listOfDataFrames);
output_file <- paste(graphsdir, "/", reference, "_", type, "_percent_query_aligned.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("% read aligned"));
garbage <- dev.off();
output_file <- paste(graphsdir, "/", reference, "_", type, "_percent_query_aligned_zoom.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("% read aligned") + scale_y_continuous(limits=c(75, 100)));
garbage <- dev.off();
}
#AlignmentSize
for (t in 1:3) {
df <- data.frame();
listOfDataFrames <- NULL;
count <- c(1);
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
if (file.exists(filename_data)) {
data_field = read.table(filename_data, header=TRUE);
if (nrow(data_field) > 0) {
thisid <- data_samples[i, "SampleName"];
listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$AlignmentSize);
count <- count + 1;
}
}
}
df <- do.call("rbind", listOfDataFrames);
output_file <- paste(graphsdir, "/", reference, "_", type, "_alignment_size.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Alignment size"));
garbage <- dev.off();
}
#AlignmentPercentIdentity
#for (t in 1:3) {
# df <- data.frame();
# listOfDataFrames <- NULL;
# count <- c(1);
# for (i in 1:nrow(data_samples)) {
# type = types[t];
# sampledir <- data_samples[i, "SampleDir"];
# filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
# if (file.exists(filename_data)) {
# data_field = read.table(filename_data, header=TRUE);
# thisid <- data_samples[i, "SampleName"];
# message(thisid);
# listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$AlignmentPercentIdentity);
# count <- count + 1;
# }
# }
#
# df <- do.call("rbind", listOfDataFrames);
# output_file <- paste(outdir, "/graphs/", reference, "_", type, "_alignment_identity.pdf", sep="");
# message(output_file);
# pdf(output_file, width=imagewidth, height = 4);
# print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Alignment identity %"));
# garbage <- dev.off();
#}
|