File: regexdna.scala

package info (click to toggle)
scala 2.7.7.dfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 75,804 kB
  • ctags: 1,852
  • sloc: java: 7,762; xml: 6,608; sh: 1,723; cs: 158; makefile: 9; ansic: 6
file content (80 lines) | stat: -rw-r--r-- 2,224 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
/* The Computer Language Shootout
   http://shootout.alioth.debian.org/
   contributed by Isaac Gouy
*/

import java.io.InputStreamReader, java.util.regex._

object regexdna { 
   def main(args: Array[String]) = {

      var sequence = readFully()
      val initialLength = sequence.length

      // remove FASTA sequence descriptions and new-lines
      var m = Pattern.compile(">.*\n|\n").matcher(sequence)
      sequence = m.replaceAll("")
      val codeLength = sequence.length

      // regex match
      val variants = Array (
         "agggtaaa|tttaccct"
         ,"[cgt]gggtaaa|tttaccc[acg]"
         ,"a[act]ggtaaa|tttacc[agt]t"
         ,"ag[act]gtaaa|tttac[agt]ct"
         ,"agg[act]taaa|ttta[agt]cct"
         ,"aggg[acg]aaa|ttt[cgt]ccct"
         ,"agggt[cgt]aa|tt[acg]accct"
         ,"agggta[cgt]a|t[acg]taccct"
         ,"agggtaa[cgt]|[acg]ttaccct"
         )

      for (val v <- variants){
         var count = 0
         m = Pattern.compile(v).matcher(sequence)
         while (m.find()) count = count + 1
         Console.println(v + " " + count)
      }

      // regex substitution
      val codes = Array (
             Pair("B", "(c|g|t)")
            ,Pair("D", "(a|g|t)")
            ,Pair("H", "(a|c|t)")
            ,Pair("K", "(g|t)")
            ,Pair("M", "(a|c)")
            ,Pair("N", "(a|c|g|t)")
            ,Pair("R", "(a|g)")
            ,Pair("S", "(c|g)")
            ,Pair("V", "(a|c|g)")
            ,Pair("W", "(a|t)")
            ,Pair("Y", "(c|t)")
         )

      for (val iub <- codes){
         iub match { 
            case Pair(code,alternative) => 
               sequence = Pattern.compile(code).matcher(sequence).replaceAll(alternative)
         }
      }

      Console.println("\n" + initialLength + "\n" + codeLength + "\n" + sequence.length)
   } 


   def readFully() = {
      val blockSize = 10240
      val block = new Array[char](blockSize)
      val buffer = new StringBuffer(blockSize)
      val r = new InputStreamReader(System.in)

      var charsRead = r.read(block, 0, blockSize)
      while (charsRead > -1){
         buffer.append(block,0,charsRead)
         charsRead = r.read(block, 0, blockSize)
      }

      r.close
      buffer.toString
   }
}