1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
|
package jasper;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
public class SparseSimilarityMatrix {
/*--------------------------------------------------------------*/
/*---------------- Initialization ----------------*/
/*--------------------------------------------------------------*/
/**
* Takes in a file of sketch similarity percentages from SketchCompare.
* Returns a sparse matrix object containing each percentage
*
* @param inputFile The file containing pairwise comparisons of each sketch
* @throws FileNotFoundException
* @throws IOException
*/
public SparseSimilarityMatrix(String inputFile, SparseTree tree_) throws FileNotFoundException, IOException {
//Assigns the input tree object to the tree variable.
tree = tree_;
//Take file name as input for building tree of related nodes
in = inputFile;
//Read in file, add header line and add to header variable
try (BufferedReader br = new BufferedReader(new FileReader(in))) {
String line;
//while line isn't empty, process
while ((line = br.readLine()) != null) {
//if line is the header line, split and assign to variable.
//may be used when header becomes more complex
if(line.startsWith("#")) {header=line.split("\t");
} else {
//If not a header line, split on tab.
String[] data = line.split("\t");
//Query organism is column 0.
String queryName = data[0];
//String refName = data[1];
}
}
}
//Get the total number of organisms in the tree.
orgCount = tree.getOrgCount();
//Initialize the matrix with the appropriate size of all nodes.
sparseMatrix = new ArrayList[orgCount + 1];
//Iterate over the matrix and add an ArrayList<Comparison> to each ArrayList.
for(int i=0; i<sparseMatrix.length; i++) {
sparseMatrix[i] = new ArrayList<Comparison>();
}
//Begin reading the file a second time.
try (BufferedReader br = new BufferedReader(new FileReader(in))) {
String line;
//while line isn't empty, process
while ((line = br.readLine()) != null) {
//If line is the header line, split and assign to variable.
//may be used when header becomes more complex
if(line.startsWith("#")) {assert true;
} else {
//If not a header line, split on tab.
String[] data = line.split("\t");
//Column 0 is query name.
String queryName = data[0];
//Column 1 is reference name.
String refName = data[1];
//Column 2 is the similarity percentage.
double similarity = Double.parseDouble(data[2]);
//Check that both names are in the HashMap (too slow?)
if(tree.containsName(queryName)==true && tree.containsName(refName)) {
//Get the positions assigned to both organisms.
int queryPos = nameToNodeId(queryName);
int refPos = nameToNodeId(refName);
Comparison currentComparison = new Comparison(queryPos, refPos, similarity);
//Add the similarity percentage to the appropriate matrix position.
sparseMatrix[queryPos].add(currentComparison);
}
}
}
}
}
/**
* Method for taking the node name and returning the node ID value
* @param orgName the organism node name (String).
* @return int The node ID of the organism name taken as input.
*/
public int nameToNodeId(String orgName) {
//Get the node associated with the input name.
TreeNode org = tree.getNode(orgName);
//Asserts the org nod is in the tree.
assert(org != null) : orgName;
//Return the int node ID.
return org.nodeId;
}
/**
* Prints out the entire matrix.
* Impractical in cases of large input datasets.
*
*/
public String toString() {
StringBuilder sb=new StringBuilder();
for (int i = 0; i < sparseMatrix.length; i++) {
for (int j = 0; j < sparseMatrix[i].size(); j++) {
sb.append(sparseMatrix[i].get(j) + " ");
}
sb.append('\n');
}
return sb.toString();
}
//TODO: This method is slow and doesnt work, need something better.
// /**
// * Returns the similarity of two specified organisms.
// * Both organisms must have been compared using SketchCompare.
// *
// * @param org1 The Name of an organism.
// * @param org2 The name of a second organism.
// * @return similarity The Double percentage similarity between the two sketches.
// */
// public Comparison getComparison(String org1, String org2) {
// int orgName1 = nameToNodeId(org1);
// int orgName2 = nameToNodeId(org2);
//
// return sparseMatrix[orgName1].get(orgName2);
// }
public int getSize() {
return orgCount;
}
public ArrayList<Comparison> getOrgRow(String orgName) {
int rowNum = tree.nodeMap.get(orgName).nodeId;
return sparseMatrix[rowNum];
}
/*--------------------------------------------------------------*/
/*---------------- Fields ----------------*/
/*--------------------------------------------------------------*/
/**
* A SparseTree object that contains taxonomic information relevant to this matrix.
*/
final SparseTree tree;
/**
* An arraylist containing comparisons between nodes in the tree.
*/
private final ArrayList<Comparison>[] sparseMatrix;
/**
* The number of sketches being analyzed.
*/
private int orgCount;
/**
* ArrayList that will hold the lines of the input file.
*/
ArrayList<String> lines = new ArrayList<String>();
/**
* Header line of the comparison input file.
*/
private String[] header;
/**
* Input file name.
*/
private String in=null;
/**
* Number of lines processed from the sketch comparison file.
*/
private long linesProcessed=0;
}
|