1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
#!/usr/bin/env Rscript
#source("http://bioconductor.org/biocLite.R")
#biocLite("savR")
options(warn=-1)
library(savR)
library(reshape2)
args = commandArgs(trailingOnly=TRUE)
project <- savR(args[1])
################
## Indexing ##
################
#total reads
total_reads<- clusters(project, 1L)
pf_reads<- pfClusters(project, 1L)
################
## Plots ##
################
##
# Data By Cycle
##
extraction<- extractionMetrics((project))
# Data By Cycle, FWHM/All Lanes / Both surfaces / All Bases
reshaped_extraction <- melt(extraction, measure.vars= c("FWHM_A","FWHM_C", "FWHM_T","FWHM_G"))
FWHM<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
colnames(FWHM) <- c("Cycles","FWHM", "Value")
FWHM$FWHM<- sub("FWHM_","",FWHM$FWHM)
ggplot(data=FWHM )+
geom_line( aes(x=Cycles , y =Value, color=FWHM)) +
ggtitle("Data by Cycle - FWHM") +
xlab("Cycle") +
ylab("All bases FWHM")
ggsave(paste(args[2], "/FWHM.png", sep=""))
# Data By Cycle,Intensity /All Lanes / Both surfaces / All Bases
reshaped_extraction <- melt(extraction, measure.vars= c("int_A","int_C", "int_T","int_G"))
intensity<- (aggregate(reshaped_extraction$value, by=list(reshaped_extraction$cycle, reshaped_extraction$variable), FUN=mean))
colnames(intensity) <- c("Cycles","Intensity", "Value")
intensity$Intensity<- sub("int_","", intensity$Intensity)
ggplot(data=intensity )+
geom_line( aes(x=Cycles , y =Value, color=Intensity))+
ggtitle("Data By Cycle - Intensity")+
xlab("Cycle")+ylab("All bases intensity")
ggsave(paste(args[2], "/intensity.png", sep=""))
# Data By Cycle, %Base /All Lanes / Both surfaces / All Bases
#
corr<- correctedIntensities(project)
corr[,seq(14,17)]<-round(corr[,seq(14,17)] / apply(corr[,seq(14,17)], 1, sum) *100,2)
corr<- melt(corr, measure.vars= c("num_A","num_C", "num_T","num_G"))
corr<-(aggregate(corr$value, by=list(corr$cycle, corr$variable), FUN=mean))
colnames(corr)<- c("Cycle", "Base", "Perc_Base")
corr$Base<- sub("num_","", corr$Base)
ggplot(corr) +
geom_line(aes(x=Cycle, y= Perc_Base, color=Base)) +
ylab("All Bases % Base") +
ggtitle("Data by Cycle - % Base")
ggsave(paste(args[2], "/base_perc.png" , sep =""))
##
# Data By Lane
##
tiles<- tileMetrics(project)
# Density, Both Surfaces
#pfBoxplot(project) # Generate a boxplot of the numbers of clusters and the number of Illumina pass-filter clusters per tile and lane
dens <-(tiles[which(tiles$code==100 | tiles$code==101 ),])
dens[which(dens$code==100),]$code <- "Raw Clusters"
dens[which(dens$code==101),]$code<- "PF Clusters"
dens$value <- dens$value/1000
ggplot(data = dens , aes(x=lane, y=value, fill=code))+
geom_boxplot() +
ggtitle("Data By Lane - Cluster Density") +
xlab("Lane")+ylab("Cluster Density (K/mm2)")
ggsave(paste(args[2], "/density.png", sep=""))
# Phasing, Both Surfaces, All Bases
phasing_code <- seq(200, (200 + (length(project@reads)-1)*2),2)
phasing <-(tiles[which(tiles$code %in% phasing_code) ,])
for(i in phasing_code){
cat(paste("Read ",((i-200)/2)+1))
phasing[which(phasing$code==i),]$code = paste("Read ",((i-200)/2)+1)
}
ggplot(data = phasing[which(phasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
geom_boxplot() +
ggtitle("Data By Lane - Phasing")+
xlab("Lane")+
ylab("% Phasing")+
scale_x_continuous(breaks = unique(phasing$lane))
ggsave(paste(args[2], "/phasing.png", sep=""))
# Pre-Phasing, Both Surfaces, All Bases
prephasing_code <- seq(201, (201 + (length(project@reads)-1)*2),2)
prephasing <-(tiles[which(tiles$code %in% prephasing_code) ,])
for(i in prephasing_code){
prephasing[which(prephasing$code==i),]$code = paste("Read ",((i-201)/2)+1)
}
ggplot(data = prephasing[which(prephasing$value>0),] , aes(x=lane, y=value*100, fill=code))+
geom_boxplot() +
ggtitle("Data By Lane - Prephasing")+
xlab("Lane")+
ylab("% Prephasing") +
scale_x_continuous(breaks = unique(prephasing$lane))
ggsave(paste(args[2], "/prephasing.png", sep=""))
##
# QScore Heatmap
##
png(paste(args[2], "/qscore_heatmap.png", sep=""), height=1025, width = 2571, res = 200)
qualityHeatmap(project, lane=seq(1,project@layout@lanecount) ,read=c(1,2))+ theme(axis.title.y = element_blank())
dev.off()
qualy<- qualityMetrics(project)
qualy<- data.frame(apply(qualy, 2, as.numeric))
qualy<- melt(qualy, measure.vars= colnames(qualy)[4:ncol(qualy)])
qualy<- aggregate(qualy$value, by=list(qualy$variable), FUN=sum)
colnames(qualy)<- c("QScore","Total")
qualy$Total <- qualy$Total/1000000
qualy$QScore <- as.numeric(qualy$QScore)
ggplot(qualy, aes(x=QScore, y = Total )) +
geom_bar(stat="identity", aes(fill=QScore>=30)) +
ylab("Total (million)") +
geom_vline(aes(xintercept=30), linetype="dashed") +
geom_text(aes(x=35, y=max(Total)-max(Total)*0.1 ,label=(paste("QScore >=30 \n",
round(sum(qualy[which(qualy$QScore>=30),]$Total)/1000,2),
"G \n",
round(sum(qualy[which(qualy$QScore>=30),]$Total)/ sum(qualy$Total)*100,2),
"%")
))) +
ggtitle("QScore Distribution") +
theme(legend.position="none")
ggsave(paste(args[2], "/qscore_distr.png", sep=""))
|