1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
|
package pp2.prediction.knn;
/**
* command-line wrapper to apply kNN predictions to a file containing several targets
* @author Thomas Hopf
*
*/
public class KNNPredictor {
public static void main(String[] args)
{
// example run: java KNNPredictor -i examples/blast_results_all.blast -o examples/prediction_out1 -k 11 -c data/deductiveClosureGO.txt -t data/goterm.txt
String usage = "java KNNPredictor -i <blast input> -o <output path> -d <distribution> [-k <#neighbors> |-e <eValue threshold>] [-nofilter] [-cc] " +
"-c <GO closure file> -t <GO term file>";
if(args.length < 8)
{
System.out.println(usage);
System.exit(-1);
}
String hitFile = null; //"examples/blast_results_example_subset2";
String outFile = null; // "examples/prediction_out1";
String distributionFile = null;
boolean eliminateDuplicateHits = true;
boolean useEValueThreshold = false;
double eValueTreshold = 1E-10;
int k=1;
String closureFile = null;
String termFile = null;
boolean noCellularComponent = false;
//String hitFile = "examples/blast_results_all.blast";
//String outFile = "examples/prediction_out1";
//boolean eliminateDuplicateHits = true;
//boolean useEValueThreshold = false;
//double eValueTreshold=1E-10;
//int k=2;
//"data/deductiveClosureGO.txt"
//"data/goterm.txt"
for(int i=0; i<args.length; i++) {
if(args[i].equals("-i"))
hitFile = args[++i];
else if(args[i].equals("-o"))
outFile = args[++i];
else if(args[i].equals("-c"))
closureFile = args[++i];
else if(args[i].equals("-t"))
termFile = args[++i];
else if(args[i].equals("-o"))
outFile = args[++i];
else if(args[i].equals("-d"))
distributionFile = args[++i];
else if(args[i].equals("-k"))
{
k = Integer.parseInt(args[++i]);
useEValueThreshold = false;
}
else if(args[i].equals("-e"))
{
eValueTreshold = Double.parseDouble(args[++i]);
useEValueThreshold = true;
}
else if(args[i].equals("-nofilter"))
eliminateDuplicateHits = false;
// else if(args[i].equals("-cc"))
// noCellularComponent = false;
else
System.out.println(usage);
}
if(hitFile == null || outFile == null || closureFile == null || termFile == null)
{
System.out.println("invalid file parameters!");
System.out.println(usage);
System.exit(-1);
}
System.out.println(" input file: " + hitFile);
System.out.println(" out path: " + outFile);
System.out.println(" use E-value: " + useEValueThreshold);
System.out.println(" k: " + k);
System.out.println(" E-Value threshold: " + eValueTreshold);
System.out.println(" eliminate duplicate hits: " + eliminateDuplicateHits);
System.out.println(" closure file: " + closureFile);
System.out.println(" term file: " + termFile);
System.out.println(" distribution file: " + distributionFile);
System.out.println(" no cellular component: " + noCellularComponent);
System.out.println();
KNNKernel knn = new KNNKernel(closureFile, termFile, noCellularComponent);
knn.predictTargetList(hitFile, outFile, eliminateDuplicateHits,useEValueThreshold, eValueTreshold, k, distributionFile);
//-i examples/blast_results_all.blast -o examples/prediction_out1 -k 11 -c data/deductiveClosureGO.txt -t data/goterm.tx
/*
* TODO:
* X doppelte Hits filtern (aus den verschiedenen PSI-Blast Iterationen)
* X BestHit als 1-NN implementieren!
* X normalisierung per ontologie
*
* - warum korrelieren unweighted und weighted so stark?
* - exception handling für individuelle targets implementieren
* X scores runden
*/
}
}
|