1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
#!/usr/bin/Rscript
# Copyright (c) 2014,
# Mathias Kuhring, KuhringM@rki.de, Robert Koch Institute, Germany,
# All rights reserved. For details, please note the license.txt.
# surankco-score: alignment of contigs (FASTA format) and reference genomes
# (FASTA format) and score calculation
# get script path
args <- commandArgs(trailingOnly = FALSE)
script.arg <- "--file="
script.name <- sub(script.arg, "", args[grep(script.arg, args)])
script.path <- dirname(script.name)
# testing/debugging
# args <- c("--directory=data")
# script.path <- getwd()
DEBUGGING <- FALSE
# sources and libraries
source(paste('/usr/lib/R/site-library/surankco', '/parameter.R', sep=""))
source(paste('/usr/lib/R/site-library/surankco', '/scores.R', sep=""))
source(paste('/usr/lib/R/site-library/surankco', '/expofits.R', sep=""))
source(paste('/usr/lib/R/site-library/surankco', '/import.R', sep=""))
loadPackages(c("optparse","MASS"), quietly=TRUE)
# parsing parameter
cat("prepare files\n")
parameters <- parseSurankcoScore()
files <- parameters$files
if (DEBUGGING){
print(args)
print(parameters)
print(files)
}
files$scores <- vector(mode="character", length=nrow(files))
# external processes
cat("align and score contigs: ")
for (i in 1:nrow(files)){
cat(paste0(i, " "))
input.file.assembly <- files[i, parameters$assembly.suffix]
input.file.reference <- files[i, parameters$reference.suffix]
output.file.psl <- paste(rownames(files)[i], ".psl.tmp", sep="")
output.file.filter <- paste(rownames(files)[i], ".filter.tmp", sep="")
output.file.pretty <- paste(rownames(files)[i], ".pretty.tmp", sep="")
output.file.scores <- paste(rownames(files)[i], ".scores.tmp", sep="")
files$scores[i] <- output.file.scores
# build alignments
blat.call <- paste(paste("blat", sep=""),
input.file.reference, input.file.assembly,
output.file.psl, "-minIdentity=0")
if (DEBUGGING){
print(blat.call)
}
if(code <- system(blat.call, ignore.stdout=TRUE)){
complainAndStop("blat not successfully executed", code)
}
# filter alignments
filter.call <- paste(paste('/usr/lib/R/site-library/surankco', "/pslMatchFilter", sep=""),
output.file.psl, output.file.filter)
if (DEBUGGING){
print(filter.call)
}
if(code <- system(filter.call, ignore.stdout=FALSE)){
complainAndStop("pslMatchFilter not successfully executed", code)
}
# extract alignments from psl
pretty.call <- paste(paste("pslPretty", sep=""),
output.file.filter, input.file.reference,
input.file.assembly, output.file.pretty, "-long")
if (DEBUGGING){
print(pretty.call)
}
if(code <- system(pretty.call, ignore.stdout=FALSE)){
complainAndStop("pslPretty not successfully executed", code)
}
# calculate scores in Java and export data for R
java.call <- paste("java",
paste("-Xms", parameters$memory, "G", sep=""),
paste("-Xmx", parameters$memory, "G", sep=""),
"-cp",
paste("/usr/share/java", "surankco.jar", sep="/"),
"de.rki.ng4.surankco.scoring.Main",
output.file.pretty,
output.file.scores)
if (DEBUGGING){
print(java.call)
}
if(code <- system(java.call, ignore.stdout=FALSE)){
complainAndStop("java not successfully executed", code)
}
if (!DEBUGGING){
unlink(c(output.file.psl, output.file.filter, output.file.pretty))
}
}
cat("\n")
# import Java score
cat("finalize scores\n")
scores.raw <- importScores(files$scores,
files[ ,parameters$assembly.suffix],
files[ ,parameters$reference.suffix])
# post process scores
# remove redundant and highly correlating scores
# (???could be moved to training/prediction to export a full set of score???)
scores.final <- correctScores(scores.raw)
# calculate fittings
# print score distributions and threshold suggestions
cat("export histograms\n")
expoFit(do.call(rbind, scores.final), TRUE, parameters$pdf.histograms)
# export score as csv
cat("export scores\n")
for (i in 1:nrow(files)){
output.file.scores <- paste(rownames(files)[i], ".scores.txt", sep="")
write.table(scores.final[[i]], file=output.file.scores,
sep="\t", dec = ".", col.names=TRUE, row.names=FALSE)
}
if (!DEBUGGING){
# unlink tmp filescat
cat("remove temporary files\n")
unlink(files$scores)
}
# done
cat("surankco-score calculations done\n")
|