File: nanook_plot_comparison_reference.R

package info (click to toggle)
nanook 1.33%2Bdfsg-5
links: PTS, VCS
area: main
in suites: bookworm
size: 2,764 kB
sloc: java: 8,788; perl: 491; sh: 84; python: 42; makefile: 21
file content (215 lines) | stat: -rwxr-xr-x 9,482 bytes
parent folder | download | duplicates (4)
library(ggplot2)
library(scales)
library(grid)
library(gridExtra)

# Filenames
args <- commandArgs(TRUE)
analysisdir <- args[1];
graphsdir <- args[2];
samplelist <- args[3];
outdir <- args[4];
reference <- args[5];
format <- args[6];

types = c("2D", "Template", "Complement");
colours = c("#68B5B9", "#CF746D", "#91A851");

if (format=="png") {
    textsize <- c(40)
    pointsize <- c(5)
    pointalpha <- c(0.5)
    pointshape <- c(1)
    pointwidth <- c(3)
    xvjust <- c(1.2)
    yvjust <- c(1.8)
} else {
    textsize <- c(10)
    pointsize <- c(2)
    pointalpha <-c(0.4)
    pointshape <- c(1)
    pointwidth <- c(1)
    xvjust <- c(0.2)
    yvjust <- c(0.8)
}


data_samples = read.table(samplelist, header=TRUE);

imagewidth <- 1 + nrow(data_samples) * 0.5;

# Query identity
for (t in 1:3) {
    df <- data.frame();
    listOfDataFrames <- NULL;
    count <- c(1);
    for (i in 1:nrow(data_samples)) {
        type = types[t];
        sampledir <- data_samples[i, "SampleDir"];
        filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
        message(filename_data);
        if (file.exists(filename_data)) {
            data_field = read.table(filename_data, header=TRUE);
            message(nrow(data_field));
            if (nrow(data_field) > 0) {
                thisid <- data_samples[i, "SampleName"];
                listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$QueryPercentIdentity);
                count <- count + 1;
            }
        }
    }

    df <- do.call("rbind", listOfDataFrames);
    output_file <- paste(graphsdir, "/", reference, "_", type, "_query_identity.pdf", sep="");
    message(output_file);
    pdf(output_file, width=imagewidth, height = 4);
    print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %"));
    garbage <- dev.off();

    output_file <- paste(graphsdir, "/", reference, "_", type, "_query_identity_zoom.pdf", sep="");
    message(output_file);
    pdf(output_file, width=imagewidth, height = 4);
    print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %") + scale_y_continuous(limits=c(60, 100)));
    garbage <- dev.off();
}

# Query GC
for (t in 1:3) {
    df <- data.frame();
    listOfDataFrames <- NULL;
    count <- c(1);
    for (i in 1:nrow(data_samples)) {
        type = types[t];
        sampledir <- data_samples[i, "SampleDir"];
        filename_data <- paste(sampledir, "/", analysisdir ,"/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
        if (file.exists(filename_data)) {
            data_field = read.table(filename_data, header=TRUE);
            message(nrow(data_field));
            if (nrow(data_field) > 0) {
                thisid <- data_samples[i, "SampleName"];
                listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$QueryGC);
                count <- count + 1;
            }
        }
    }
    
    df <- do.call("rbind", listOfDataFrames);
    output_file <- paste(graphsdir, "/", reference, "_", type, "_query_gc.pdf", sep="");
    message(output_file);
    pdf(output_file, width=imagewidth, height = 4);
    print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read GC %"));
    garbage <- dev.off();
}

# Best perfect kmer
for (t in 1:3) {
    df <- data.frame();
    listOfDataFrames <- NULL;
    count <- c(1);
    for (i in 1:nrow(data_samples)) {
        type = types[t];
        sampledir <- data_samples[i, "SampleDir"];
        filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
        if (file.exists(filename_data)) {
            data_field = read.table(filename_data, header=TRUE);
            if (nrow(data_field) > 0) {
                thisid <- data_samples[i, "SampleName"];
                listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$LongestPerfectKmer);
                count <- count + 1;
            }
        }
    }
    
    df <- do.call("rbind", listOfDataFrames);
    output_file <- paste(graphsdir, "/", reference, "_", type, "_best_perfect_kmer.pdf", sep="");
    message(output_file);
    pdf(output_file, width=imagewidth, height = 4);
    print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Best perfect kmer"));
    garbage <- dev.off();
}

#PercentQueryAligned
for (t in 1:3) {
    df <- data.frame();
    listOfDataFrames <- NULL;
    count <- c(1);
    for (i in 1:nrow(data_samples)) {
        type = types[t];
        sampledir <- data_samples[i, "SampleDir"];
        filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
        if (file.exists(filename_data)) {
            data_field = read.table(filename_data, header=TRUE);
            if (nrow(data_field) > 0) {
                thisid <- data_samples[i, "SampleName"];
                listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$PercentQueryAligned);
                count <- count + 1;
            }
        }
    }
    
    df <- do.call("rbind", listOfDataFrames);
    output_file <- paste(graphsdir, "/", reference, "_", type, "_percent_query_aligned.pdf", sep="");
    message(output_file);
    pdf(output_file, width=imagewidth, height = 4);
    print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("% read aligned"));
    garbage <- dev.off();

    output_file <- paste(graphsdir, "/", reference, "_", type, "_percent_query_aligned_zoom.pdf", sep="");
    message(output_file);
    pdf(output_file, width=imagewidth, height = 4);
    print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("% read aligned") + scale_y_continuous(limits=c(75, 100)));
    garbage <- dev.off();
}

#AlignmentSize
for (t in 1:3) {
    df <- data.frame();
    listOfDataFrames <- NULL;
    count <- c(1);
    for (i in 1:nrow(data_samples)) {
        type = types[t];
        sampledir <- data_samples[i, "SampleDir"];
        filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
        if (file.exists(filename_data)) {
            data_field = read.table(filename_data, header=TRUE);
            if (nrow(data_field) > 0) {
                thisid <- data_samples[i, "SampleName"];
                listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$AlignmentSize);
                count <- count + 1;
            }
        }
    }
    
    df <- do.call("rbind", listOfDataFrames);
    output_file <- paste(graphsdir, "/", reference, "_", type, "_alignment_size.pdf", sep="");
    message(output_file);
    pdf(output_file, width=imagewidth, height = 4);
    print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Alignment size"));
    garbage <- dev.off();
}

#AlignmentPercentIdentity
#for (t in 1:3) {
#    df <- data.frame();
#    listOfDataFrames <- NULL;
#    count <- c(1);
#    for (i in 1:nrow(data_samples)) {
#        type = types[t];
#        sampledir <- data_samples[i, "SampleDir"];
#        filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
#        if (file.exists(filename_data)) {
#            data_field = read.table(filename_data, header=TRUE);
#            thisid <- data_samples[i, "SampleName"];
#            message(thisid);
#            listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$AlignmentPercentIdentity);
#            count <- count + 1;
#        }
#    }
#
#    df <- do.call("rbind", listOfDataFrames);
#    output_file <- paste(outdir, "/graphs/", reference, "_", type, "_alignment_identity.pdf", sep="");
#    message(output_file);
#    pdf(output_file, width=imagewidth, height = 4);
#    print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Alignment identity %"));
#    garbage <- dev.off();
#}