File: SimilarityMatrix.java

package info (click to toggle)
bbmap 39.20%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,024 kB
  • sloc: java: 312,743; sh: 18,099; python: 5,247; ansic: 2,074; perl: 96; makefile: 39; xml: 38
file content (132 lines) | stat: -rwxr-xr-x 4,058 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package jasper;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

public class SimilarityMatrix {

	//ArrayList that will hold the lines of the input file
	ArrayList<String> lines = new ArrayList<String>();
	
	//Set that will hold the names of the organisms being compared in the input file
	Set<String> nameSet = new HashSet<String>();
	
	/**
	 * Builds a similarity matrix from an input file of similarity percentages
	 * 
	 * @param inputFile The file holding the output of Sketch comparisons.
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	public SimilarityMatrix(String inputFile) throws FileNotFoundException, IOException {
		
		//Take file name as input for building tree of related nodes
		String[] split=inputFile.split("=");
		String a=split[0].toLowerCase();
		String b=split.length>1 ? split[1] : null;
		if(b!=null && b.equalsIgnoreCase("null")){b=null;}
		in = b;
		
		//Read in file, add header line and add to header variable
	    try (BufferedReader br = new BufferedReader(new FileReader(in))) {
	        String line;
	        
	        //while line isn't empty, process
	        while ((line = br.readLine()) != null) {
	        	
	        	//if line is the header line, split and assign to variable.
	        	//may be used when header becomes more complex
	        	if(line.startsWith("#")) {header=line.split("\t");
	        	} else {
	        		String[] data = line.split("\t");
	        		
	        		//make sure the data in column 1 isn't in the header line
		        	//column 1 should be query names
	        		//Add the name of the query to the Set nameSet
	        		if(!Arrays.asList(header).contains(data[0])) {nameSet.add(data[0]);}
	        		
	        		//add line to list of lines
	        		lines.add(line);
	        	
	        	}
	        	
	        }
	    }
		
	    //current location of the matrix. Not the ideal place for it 
	    double[][] matrix = new double[nameSet.size() + 1 ][nameSet.size() + 1];
	    
	    //loop over lines and fill in matrix
	    for(int i=0; i<lines.size(); i++) {
	    	
	    	
	    	fillMatrix(matrix, nameSet, lines.toArray()[i]);
	    }
	    
	    //return matrix;
	}
	
	/**
	 * Fill matrix with relationship information of organisms output from sketch comparison.
	 * 
	 * @param matrix Matrix of comparison percentage values.
	 * @param setNames Set of names of included organisms.
	 * @param object Line of sketch comparison output file.
	 */
	void fillMatrix(double[][] matrix, Set<String> setNames, Object object) {
		//System.out.println(object);
		//cast line as string
		String stringLine = (String) object;
		
		//split line
		String[] lineData = stringLine.split("\t");
		
		//place both organism names in variables
		//qName is the query, column 1
		String queryName = lineData[0];
		String altName = lineData[1];
		
		//collect similarity percentage
		double similarity = Double.parseDouble(lineData[2]);
		
		//set positions variables
		int qPos = -1;
		int mPos = -1;
		
		//convert setNames to array that can be iterated over
		String[] nameArray = setNames.toArray(new String[setNames.size()]);
		
		//loop over setNames and get each organisms positions within the matrix
		//add the similarity percentage to the appropriate position within the matrix
		for(int i = 0; i<nameArray.length; i++) {
			if(nameArray[i].contentEquals(queryName)) {qPos = i;}
			else if(nameArray[i].contentEquals(altName)) {mPos = i;}
			
			
			//after finding both name positions, add similarity value to matrix
			if(qPos!=-1 && mPos!=-1) {matrix[qPos][mPos] = similarity;}
		}
		
	}
	
	/*
	public void showMatrix() {
		for (int i = 0; i < matrix.length; i++) {
		    for (int j = 0; j < matrix[i].length; j++) {
		        System.out.print(matrix[i][j] + " ");
		    }
		    System.out.println();
		}
	}
	*/
	
	private String[] header;
	private String in=null;
	private long linesProcessed=0;
}