File: regexdna.java

package info (click to toggle)
groovy2 2.2.2%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie-kfreebsd
  • size: 23,916 kB
  • sloc: java: 136,570; xml: 948; sh: 486; makefile: 67; ansic: 64
file content (62 lines) | stat: -rw-r--r-- 2,628 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

/* The Computer Language Shootout
   http://shootout.alioth.debian.org/
   contributed by Josh Goldfoot
   based on the Nice entry by Isaac Guoy
*/

import java.io.*;
import java.lang.*;
import java.util.regex.*;

public class regexdna {
    
    public regexdna() {
    }

    public static void main(String[] args) {
        BufferedReader r = new BufferedReader(new InputStreamReader(System.in));
        StringBuffer sb = new StringBuffer(10240);
        char[] cbuf = new char[10240];
        int charsRead = 0;
        try {
            while ((charsRead = r.read(cbuf, 0, 10240)) != -1) 
                sb.append(cbuf, 0, charsRead);
        } catch (java.io.IOException e) {
            return;
        }
        String sequence = sb.toString();
        
        int initialLength = sequence.length();
        sequence = Pattern.compile(">.*\n|\n").matcher(sequence).replaceAll("");
        int codeLength = sequence.length();
        
        String[] variants = { "agggtaaa|tttaccct" ,"[cgt]gggtaaa|tttaccc[acg]", "a[act]ggtaaa|tttacc[agt]t", 
                 "ag[act]gtaaa|tttac[agt]ct", "agg[act]taaa|ttta[agt]cct", "aggg[acg]aaa|ttt[cgt]ccct",                     
                 "agggt[cgt]aa|tt[acg]accct", "agggta[cgt]a|t[acg]taccct", "agggtaa[cgt]|[acg]ttaccct" };
        for (int i = 0; i < variants.length; i++) {
            int count = 0;
            Matcher m = Pattern.compile(variants[i]).matcher(sequence);
            while (m.find())
                count++;
            System.out.println(variants[i] + " " + count);
        }
        
        sequence = Pattern.compile("B").matcher(sequence).replaceAll("(c|g|t)");
        sequence = Pattern.compile("D").matcher(sequence).replaceAll("(a|g|t)");
        sequence = Pattern.compile("H").matcher(sequence).replaceAll("(a|c|t)");
        sequence = Pattern.compile("K").matcher(sequence).replaceAll("(g|t)");
        sequence = Pattern.compile("M").matcher(sequence).replaceAll("(a|c)");
        sequence = Pattern.compile("N").matcher(sequence).replaceAll("(a|c|g|t)");
        sequence = Pattern.compile("R").matcher(sequence).replaceAll("(a|g)");
        sequence = Pattern.compile("S").matcher(sequence).replaceAll("(c|g)");
        sequence = Pattern.compile("V").matcher(sequence).replaceAll("(a|c|g)");
        sequence = Pattern.compile("W").matcher(sequence).replaceAll("(a|t)");
        sequence = Pattern.compile("Y").matcher(sequence).replaceAll("(c|t)");
        
        System.out.println();
        System.out.println(initialLength);
        System.out.println(codeLength);
        System.out.println(sequence.length());
    }
}