File: KNNPredictor.java

package info (click to toggle)
metastudent 2.0.1-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 95,644 kB
  • sloc: java: 3,287; perl: 2,089; python: 1,421; ruby: 242; sh: 39; makefile: 19
file content (113 lines) | stat: -rwxr-xr-x 3,643 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
package pp2.prediction.knn;


/**
 * command-line wrapper to apply kNN predictions to a file containing several targets
 * @author Thomas Hopf
 *
 */
public class KNNPredictor {

	
	public static void main(String[] args)
	{		
		// example run: java KNNPredictor -i examples/blast_results_all.blast -o examples/prediction_out1 -k 11 -c data/deductiveClosureGO.txt -t data/goterm.txt

		String usage = 	"java KNNPredictor -i <blast input> -o <output path> -d <distribution> [-k <#neighbors> |-e <eValue threshold>] [-nofilter] [-cc] " +
						"-c <GO closure file> -t <GO term file>";
		
		if(args.length < 8)
		{
			System.out.println(usage);
			System.exit(-1);
		}
		
		String hitFile = null; //"examples/blast_results_example_subset2";
		String outFile = null; // "examples/prediction_out1";
		String distributionFile = null;
		boolean eliminateDuplicateHits = true;
		boolean useEValueThreshold = false;
		double eValueTreshold = 1E-10;
		int k=1;
		
		String closureFile = null;
		String termFile = null;
		boolean noCellularComponent = false;
		
		//String hitFile = "examples/blast_results_all.blast";
		//String outFile = "examples/prediction_out1";
		//boolean eliminateDuplicateHits = true;
		//boolean useEValueThreshold = false;
		//double eValueTreshold=1E-10;
		//int k=2;
		//"data/deductiveClosureGO.txt"
		//"data/goterm.txt"

		for(int i=0; i<args.length; i++) {
			
			if(args[i].equals("-i"))
				hitFile = args[++i];
			else if(args[i].equals("-o"))
				outFile = args[++i];
			else if(args[i].equals("-c"))
				closureFile = args[++i];
			else if(args[i].equals("-t"))
				termFile = args[++i];
			else if(args[i].equals("-o"))
				outFile = args[++i];
			else if(args[i].equals("-d"))
				distributionFile = args[++i];
			else if(args[i].equals("-k"))
			{
				k = Integer.parseInt(args[++i]);
				useEValueThreshold = false;
			}
			else if(args[i].equals("-e"))
			{
				eValueTreshold = Double.parseDouble(args[++i]);
				useEValueThreshold = true;
			}
			else if(args[i].equals("-nofilter"))
				eliminateDuplicateHits = false;
//			else if(args[i].equals("-cc"))
//				noCellularComponent = false;
			else
				System.out.println(usage);
		}
	
		if(hitFile == null || outFile == null || closureFile == null || termFile == null)
		{
			System.out.println("invalid file parameters!");
			System.out.println(usage);
			System.exit(-1);
		}

		System.out.println("   input file: " + hitFile);
		System.out.println("   out path: " + outFile);
		System.out.println("   use E-value: " + useEValueThreshold);
		System.out.println("   k: " + k);
		System.out.println("   E-Value threshold: " + eValueTreshold);
		System.out.println("   eliminate duplicate hits: " + eliminateDuplicateHits);
		System.out.println("   closure file: " + closureFile);
		System.out.println("   term file: " + termFile);
		System.out.println("   distribution file: " + distributionFile);
		System.out.println("   no cellular component: " + noCellularComponent);
		System.out.println();

		KNNKernel knn = new KNNKernel(closureFile, termFile, noCellularComponent);
		knn.predictTargetList(hitFile, outFile, eliminateDuplicateHits,useEValueThreshold, eValueTreshold, k, distributionFile);
		
		//-i examples/blast_results_all.blast -o examples/prediction_out1 -k 11 -c data/deductiveClosureGO.txt -t data/goterm.tx
		
		/*
		 * TODO:
		 * X doppelte Hits filtern (aus den verschiedenen PSI-Blast Iterationen)
		 * X BestHit als 1-NN implementieren!
		 * X normalisierung per ontologie
		 * 
		 * - warum korrelieren unweighted und weighted so stark?
		 * - exception handling für individuelle targets implementieren
		 * X scores runden
		 */
	}
}