File: surankco-score

package info (click to toggle)
surankco 0.0.r5%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 412 kB
  • sloc: java: 2,760; makefile: 13
file content (151 lines) | stat: -rwxr-xr-x 4,613 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/Rscript

# Copyright (c) 2014,
# Mathias Kuhring, KuhringM@rki.de, Robert Koch Institute, Germany, 
# All rights reserved. For details, please note the license.txt.

# surankco-score: alignment of contigs (FASTA format) and reference genomes 
#                 (FASTA format) and score calculation


# get script path
args <- commandArgs(trailingOnly = FALSE)
script.arg <- "--file="
script.name <- sub(script.arg, "", args[grep(script.arg, args)])
script.path <- dirname(script.name)


# testing/debugging
# args <- c("--directory=data")
# script.path <- getwd()
DEBUGGING <- FALSE


# sources and libraries
source(paste('/usr/lib/R/site-library/surankco', '/parameter.R', sep=""))
source(paste('/usr/lib/R/site-library/surankco', '/scores.R', sep=""))
source(paste('/usr/lib/R/site-library/surankco', '/expofits.R', sep=""))
source(paste('/usr/lib/R/site-library/surankco', '/import.R', sep=""))
loadPackages(c("optparse","MASS"), quietly=TRUE)


# parsing parameter
cat("prepare files\n")
parameters <- parseSurankcoScore()
files <- parameters$files

if (DEBUGGING){
  print(args)
  print(parameters)
  print(files)
}

files$scores <- vector(mode="character", length=nrow(files))

# external processes
cat("align and score contigs: ")
for (i in 1:nrow(files)){
  cat(paste0(i, " "))
  input.file.assembly <- files[i, parameters$assembly.suffix]
  input.file.reference <- files[i, parameters$reference.suffix]
  output.file.psl <- paste(rownames(files)[i], ".psl.tmp", sep="")
  output.file.filter <- paste(rownames(files)[i], ".filter.tmp", sep="")
  output.file.pretty <- paste(rownames(files)[i], ".pretty.tmp", sep="")
  output.file.scores <- paste(rownames(files)[i], ".scores.tmp", sep="")
  files$scores[i] <- output.file.scores
  
  # build alignments
  blat.call <- paste(paste("blat", sep=""),
                     input.file.reference, input.file.assembly,
                     output.file.psl, "-minIdentity=0")
  
  if (DEBUGGING){
    print(blat.call)
  }
  
  if(code <- system(blat.call, ignore.stdout=TRUE)){
    complainAndStop("blat not successfully executed", code)
  }
  
  # filter alignments
  filter.call <- paste(paste('/usr/lib/R/site-library/surankco', "/pslMatchFilter", sep=""), 
                       output.file.psl, output.file.filter)
  
  if (DEBUGGING){
    print(filter.call)
  }
  
  if(code <- system(filter.call, ignore.stdout=FALSE)){
    complainAndStop("pslMatchFilter not successfully executed", code)
  }
  
  # extract alignments from psl
  pretty.call <- paste(paste("pslPretty", sep=""), 
                       output.file.filter, input.file.reference,
                       input.file.assembly, output.file.pretty, "-long")
  
  if (DEBUGGING){
    print(pretty.call)
  }
  
  if(code <- system(pretty.call, ignore.stdout=FALSE)){
    complainAndStop("pslPretty not successfully executed", code)
  }
  
  # calculate scores in Java and export data for R
  java.call <- paste("java", 
                     paste("-Xms", parameters$memory, "G", sep=""), 
                     paste("-Xmx", parameters$memory, "G", sep=""), 
                     "-cp",
                     paste("/usr/share/java", "surankco.jar", sep="/"),
                     "de.rki.ng4.surankco.scoring.Main",
                     output.file.pretty,
                     output.file.scores)
  
  if (DEBUGGING){
    print(java.call)
  }
  
  if(code <- system(java.call, ignore.stdout=FALSE)){
    complainAndStop("java not successfully executed", code)
  }
  
  if (!DEBUGGING){
    unlink(c(output.file.psl, output.file.filter, output.file.pretty))
  }
}
cat("\n")

# import Java score
cat("finalize scores\n")
scores.raw <- importScores(files$scores, 
                           files[ ,parameters$assembly.suffix], 
                           files[ ,parameters$reference.suffix])

# post process scores
# remove redundant and highly correlating scores
# (???could be moved to training/prediction to export a full set of score???)
scores.final <- correctScores(scores.raw)

# calculate fittings
# print score distributions and threshold suggestions
cat("export histograms\n")
expoFit(do.call(rbind, scores.final), TRUE, parameters$pdf.histograms)

# export score as csv
cat("export scores\n")
for (i in 1:nrow(files)){
  output.file.scores <- paste(rownames(files)[i], ".scores.txt", sep="")
  
  write.table(scores.final[[i]], file=output.file.scores, 
              sep="\t", dec = ".", col.names=TRUE, row.names=FALSE)
}

if (!DEBUGGING){
  # unlink tmp filescat
  cat("remove temporary files\n")
  unlink(files$scores)
}

# done
cat("surankco-score calculations done\n")