1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
|
package pp2.prediction.knn;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import pp2.go.Relations;
import pp2.tools.Tools;
/**
* performs all operations necessary to predict a list of targets
*
* @author Thomas Hopf
*
*/
public class KNNKernel {
// holds all relations in the GO ontology tree
private Relations relations;
public KNNKernel(String closureFile, String termFile, boolean noCellularComponent)
{
relations = new Relations(closureFile, termFile, noCellularComponent);
}
/**
* fetches the next target with all BLAST hits from the reader
* @param reader
* @return
*/
private PredictionTarget getNextTarget(BufferedReader reader, boolean eliminateDuplicateHits)
{
PredictionTarget nextTarget = new PredictionTarget();
try {
String line = null;
while((line = reader.readLine()) != null)
{
// return current target when we hit the "end-of-target"-line "//"
if(line.equals("//"))
return nextTarget;
// get ID of current target
if(line.startsWith(">"))
nextTarget.setTargetID(line.substring(1));
// if line starts with "#", this is one of the BLAST hits in the database
if(line.startsWith("#"))
nextTarget.addBlastHit(line.substring(1), eliminateDuplicateHits);
}
} catch(Exception e)
{
e.printStackTrace();
}
return null;
}
/**
* carries out the prediction of all targets in file containing multiple targets
* @param targetFile
*/
public void predictTargetList(String targetFile, String outFile, boolean eliminateDuplicateHits, boolean useEValueThreshold, double eValueTreshold, int k, String distributionFile)
{
try {
BufferedReader in = Tools.openFile(targetFile);
BufferedWriter naiveWriter = Tools.writeToFile(outFile + ".naive");
BufferedWriter leaveWriter = Tools.writeToFile(outFile + ".leaves");
// read the distribution of global quality scores
double[] distribution = null;
if(distributionFile != null)
{
distribution = new double[100];
int i = 0;
BufferedReader distReader = Tools.openFile(distributionFile);
String line = null;
while((line = distReader.readLine()) != null)
{
distribution[i] = Double.parseDouble(line);
i++;
}
if(i != 100)
System.err.println("warning: distribution file does not contain exactly 100 quantile lines.");
distReader.close();
}
// the current target for which to perform a prediction
PredictionTarget target = null;
int numPredicted = 0;
// iterate over all targets
while((target = getNextTarget(in, eliminateDuplicateHits)) != null)
{
// skip double // error in blast output file
if(target.getTargetID() == null)
continue;
//System.err.println(">" + target.getTargetID());
try {
target.predict(relations, useEValueThreshold, eValueTreshold, k, naiveWriter, leaveWriter, distribution);
} catch(Exception e)
{
System.err.println("an error occured while predicting target " + target.getTargetID());
e.printStackTrace();
}
numPredicted++;
}
System.out.println(" predicted " + numPredicted + " sequences.");
in.close();
naiveWriter.close();
leaveWriter.close();
} catch(IOException e)
{
e.printStackTrace();
}
}
}
|