1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
|
/* The Computer Language Shootout
http://shootout.alioth.debian.org/
contributed by Isaac Gouy
*/
import java.io.InputStreamReader, java.util.regex._
object regexdna {
def main(args: Array[String]) = {
var sequence = readFully()
val initialLength = sequence.length
// remove FASTA sequence descriptions and new-lines
var m = Pattern.compile(">.*\n|\n").matcher(sequence)
sequence = m.replaceAll("")
val codeLength = sequence.length
// regex match
val variants = Array (
"agggtaaa|tttaccct"
,"[cgt]gggtaaa|tttaccc[acg]"
,"a[act]ggtaaa|tttacc[agt]t"
,"ag[act]gtaaa|tttac[agt]ct"
,"agg[act]taaa|ttta[agt]cct"
,"aggg[acg]aaa|ttt[cgt]ccct"
,"agggt[cgt]aa|tt[acg]accct"
,"agggta[cgt]a|t[acg]taccct"
,"agggtaa[cgt]|[acg]ttaccct"
)
for (val v <- variants){
var count = 0
m = Pattern.compile(v).matcher(sequence)
while (m.find()) count = count + 1
Console.println(v + " " + count)
}
// regex substitution
val codes = Array (
Pair("B", "(c|g|t)")
,Pair("D", "(a|g|t)")
,Pair("H", "(a|c|t)")
,Pair("K", "(g|t)")
,Pair("M", "(a|c)")
,Pair("N", "(a|c|g|t)")
,Pair("R", "(a|g)")
,Pair("S", "(c|g)")
,Pair("V", "(a|c|g)")
,Pair("W", "(a|t)")
,Pair("Y", "(c|t)")
)
for (val iub <- codes){
iub match {
case Pair(code,alternative) =>
sequence = Pattern.compile(code).matcher(sequence).replaceAll(alternative)
}
}
Console.println("\n" + initialLength + "\n" + codeLength + "\n" + sequence.length)
}
def readFully() = {
val blockSize = 10240
val block = new Array[char](blockSize)
val buffer = new StringBuffer(blockSize)
val r = new InputStreamReader(System.in)
var charsRead = r.read(block, 0, blockSize)
while (charsRead > -1){
buffer.append(block,0,charsRead)
charsRead = r.read(block, 0, blockSize)
}
r.close
buffer.toString
}
}
|