1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
|
/**
* Title: ProAlign<p>
* Description: <p>
* Copyright: Copyright (c) Ari Loytynoja<p>
* License: GNU GENERAL PUBLIC LICENSE<p>
* @see http://www.gnu.org/copyleft/gpl.html
* Company: ULB<p>
* @author Ari Loytynoja
* @version 1.0
*/
package proalign;
class EstimateFrequencies {
double[] charFreqs;
EstimateFrequencies(String[] dataArray, String alphabet, String equateAlphabet) {
ProAlign.log("EstimateFrequencies");
equateAlphabet = equateAlphabet+"-";
int[] eqCounts = new int[equateAlphabet.length()];
int sum = 0;
for(int i=0; i<dataArray.length; i++) {
for(int j=0; j<dataArray[i].length(); j++) {
eqCounts[equateAlphabet.indexOf(dataArray[i].charAt(j))]++;
sum++;
}
}
float[] counts = new float[alphabet.length()];
counts[0] = (float)eqCounts[0]+(float)eqCounts[4]/2f+(float)eqCounts[6]/2f+
(float)eqCounts[9]/2f+(float)eqCounts[10]/3f+(float)eqCounts[12]/3f+
+(float)eqCounts[13]/3f+(float)eqCounts[14]/4f;
counts[1] = (float)eqCounts[1]+(float)eqCounts[5]/2f+(float)eqCounts[6]/2f+
(float)eqCounts[8]/2f+(float)eqCounts[10]/3f+(float)eqCounts[11]/3f+
+(float)eqCounts[12]/3f+(float)eqCounts[14]/4f;
counts[2] = (float)eqCounts[2]+(float)eqCounts[4]/2f+(float)eqCounts[7]/2f+
(float)eqCounts[8]/2f+(float)eqCounts[11]/3f+(float)eqCounts[12]/3f+
+(float)eqCounts[13]/3f+(float)eqCounts[14]/4f;
counts[3] = (float)eqCounts[3]+(float)eqCounts[5]/2f+(float)eqCounts[7]/2f+
(float)eqCounts[9]/2f+(float)eqCounts[10]/3f+(float)eqCounts[11]/3f+
+(float)eqCounts[13]/3f+(float)eqCounts[14]/4f+eqCounts[15];
counts[4] = (float)eqCounts[16];
charFreqs = new double[alphabet.length()];
for(int i=0; i<alphabet.length(); i++) {
charFreqs[i] = (double) counts[i]/sum;
}
for(int i=0; i<alphabet.length(); i++) {
ProAlign.log(" "+alphabet.charAt(i)+":"+charFreqs[i]);
}
}
double[] getCharacterFrequencies() {
return charFreqs;
}
}
|