File: KNNKernel.java

package info (click to toggle)
metastudent 2.0.1-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 95,644 kB
  • sloc: java: 3,287; perl: 2,089; python: 1,421; ruby: 242; sh: 39; makefile: 19
file content (128 lines) | stat: -rwxr-xr-x 3,350 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
package pp2.prediction.knn;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;

import pp2.go.Relations;
import pp2.tools.Tools;

/**
 * performs all operations necessary to predict a list of targets
 * 
 * @author Thomas Hopf
 *
 */
public class KNNKernel {
	
	// holds all relations in the GO ontology tree
	private Relations relations;
	
	public KNNKernel(String closureFile, String termFile, boolean noCellularComponent)
	{
		relations = new Relations(closureFile, termFile, noCellularComponent);
	}
	
	/**
	 * fetches the next target with all BLAST hits from the reader
	 * @param reader
	 * @return
	 */
	private PredictionTarget getNextTarget(BufferedReader reader, boolean eliminateDuplicateHits)
	{
		PredictionTarget nextTarget = new PredictionTarget();
		
		try {
			String line = null;
			while((line = reader.readLine()) != null)
			{
				// return current target when we hit the "end-of-target"-line "//"
				if(line.equals("//"))
					return nextTarget;
				
				// get ID of current target
				if(line.startsWith(">"))
					nextTarget.setTargetID(line.substring(1));
				
				// if line starts with "#", this is one of the BLAST hits in the database
				if(line.startsWith("#"))
					nextTarget.addBlastHit(line.substring(1), eliminateDuplicateHits);
			}
				
		} catch(Exception e)
		{
			e.printStackTrace();
		}
		
		return null;
		
	}
	
	/**
	 * carries out the prediction of all targets in file containing multiple targets
	 * @param targetFile
	 */
	public void predictTargetList(String targetFile, String outFile, boolean eliminateDuplicateHits, boolean useEValueThreshold, double eValueTreshold, int k, String distributionFile)
	{
		try {
			BufferedReader in = Tools.openFile(targetFile);
			BufferedWriter naiveWriter = Tools.writeToFile(outFile + ".naive");
			BufferedWriter leaveWriter = Tools.writeToFile(outFile + ".leaves");

			// read the distribution of global quality scores
			double[] distribution = null;
			if(distributionFile != null)
			{
				distribution = new double[100];
				int i = 0;
				BufferedReader distReader = Tools.openFile(distributionFile);
				
				String line = null;
				while((line = distReader.readLine()) != null)
				{
					distribution[i] = Double.parseDouble(line);
					i++;
				}
				if(i != 100)
					System.err.println("warning: distribution file does not contain exactly 100 quantile lines.");
				
				distReader.close();
			}
			
			// the current target for which to perform a prediction
			PredictionTarget target = null;
			
			int numPredicted = 0;
			// iterate over all targets
			while((target = getNextTarget(in, eliminateDuplicateHits)) != null)
			{
				// skip double // error in blast output file
				if(target.getTargetID() == null)
					continue;
				
				//System.err.println(">" + target.getTargetID());
				try {
					target.predict(relations, useEValueThreshold, eValueTreshold, k, naiveWriter, leaveWriter, distribution);
				} catch(Exception e)
				{
					System.err.println("an error occured while predicting target " + target.getTargetID());
					e.printStackTrace();
				}
				
				numPredicted++;
			}
			
			System.out.println("   predicted " + numPredicted + " sequences.");
			in.close();
			naiveWriter.close();
			leaveWriter.close();
		} catch(IOException e)
		{
			e.printStackTrace();
		}
		
		
	}
	

}