File: EstimateFrequencies.java

package info (click to toggle)
proalign 0.603-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 492 kB
  • sloc: java: 8,673; sh: 27; makefile: 4
file content (61 lines) | stat: -rw-r--r-- 1,983 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
/**
 * Title:        ProAlign<p>
 * Description:  <p>
 * Copyright:    Copyright (c) Ari Loytynoja<p>
 * License:      GNU GENERAL PUBLIC LICENSE<p>
 * @see          http://www.gnu.org/copyleft/gpl.html
 * Company:      ULB<p>
 * @author Ari Loytynoja
 * @version 1.0
 */
package proalign;

class EstimateFrequencies {

    double[] charFreqs;
    
    EstimateFrequencies(String[] dataArray, String alphabet, String equateAlphabet) {

	ProAlign.log("EstimateFrequencies");

	equateAlphabet = equateAlphabet+"-";

	int[] eqCounts = new int[equateAlphabet.length()];

	int sum = 0;
	for(int i=0; i<dataArray.length; i++) {
	    for(int j=0; j<dataArray[i].length(); j++) {  
		eqCounts[equateAlphabet.indexOf(dataArray[i].charAt(j))]++;
		sum++;
	    }
	}

	float[] counts = new float[alphabet.length()];
	counts[0] = (float)eqCounts[0]+(float)eqCounts[4]/2f+(float)eqCounts[6]/2f+
	    (float)eqCounts[9]/2f+(float)eqCounts[10]/3f+(float)eqCounts[12]/3f+
	    +(float)eqCounts[13]/3f+(float)eqCounts[14]/4f;
	counts[1] = (float)eqCounts[1]+(float)eqCounts[5]/2f+(float)eqCounts[6]/2f+
	    (float)eqCounts[8]/2f+(float)eqCounts[10]/3f+(float)eqCounts[11]/3f+
	    +(float)eqCounts[12]/3f+(float)eqCounts[14]/4f;
	counts[2] = (float)eqCounts[2]+(float)eqCounts[4]/2f+(float)eqCounts[7]/2f+
	    (float)eqCounts[8]/2f+(float)eqCounts[11]/3f+(float)eqCounts[12]/3f+
	    +(float)eqCounts[13]/3f+(float)eqCounts[14]/4f;
	counts[3] = (float)eqCounts[3]+(float)eqCounts[5]/2f+(float)eqCounts[7]/2f+
	    (float)eqCounts[9]/2f+(float)eqCounts[10]/3f+(float)eqCounts[11]/3f+
	    +(float)eqCounts[13]/3f+(float)eqCounts[14]/4f+eqCounts[15];
	counts[4] = (float)eqCounts[16];

	charFreqs = new double[alphabet.length()];
	for(int i=0; i<alphabet.length(); i++) {
	    charFreqs[i] = (double) counts[i]/sum; 
	}

	for(int i=0; i<alphabet.length(); i++) {
	    ProAlign.log(" "+alphabet.charAt(i)+":"+charFreqs[i]);
	}
    }

    double[] getCharacterFrequencies() {
	return charFreqs;
    }
}