File: BlastHit.java

package info (click to toggle)
metastudent 2.0.1-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 95,644 kB
  • sloc: java: 3,287; perl: 2,089; python: 1,421; ruby: 242; sh: 39; makefile: 19
file content (132 lines) | stat: -rwxr-xr-x 3,076 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package pp2.prediction.knn;


/**
 * Stores a BLAST hit with associated information such as E-value etc.
 * @author Thomas Hopf
 *
 */
public class BlastHit implements Comparable<BlastHit>{

	// FASTA identifier of the hit
	private String hitID;
	
	// list of all associated go terms
	private String[] goTerms;
	
	// all measures of the hit
	private double eValue;
	private double bitScore;
	private int length;
	
	private int sequenceIdentity;
	private int sequenceIdentityLength;
	private int gaps;
	private int gapsLength;
	
	// PSI-Blast iteration in which the hit was found
	private int iteration;
	
	// holds which ontologies are supported by this hit (important for calculating normalization values)
	private boolean[] supports;
//	private boolean supportsMolecularFunction = false;
//	private boolean supportsBiologicalProcess = false;
//	private boolean supportsCellularComponent = false;
	
	public BlastHit(String hitLine)
	{
		try {
			String[] split = hitLine.split("\t");
			//System.out.println(Arrays.toString(split));
			
			// extract all information from the hitLine
			hitID = split[0];
			goTerms = split[1].split(",");
			
			// if eValue has format e-117, we need to add a "1" as prefix to avoid a NumberFormatException
			eValue = Double.parseDouble(split[2].startsWith("e") ? ("1"+split[2]): split[2]);
			
			bitScore = Double.parseDouble(split[3]);
			length = Integer.parseInt(split[4]);
			sequenceIdentity = Integer.parseInt(split[5].split("/")[0]);
			sequenceIdentityLength = Integer.parseInt(split[5].split("/")[1]);
			if(split[6].equals("0"))
			{
				gaps = 0;
				gapsLength = length;
			}
			else
			{
				gaps = Integer.parseInt(split[6].split("/")[0]);
				gapsLength = Integer.parseInt(split[6].split("/")[1]);
			}
			iteration = Integer.parseInt(split[7]);
			
			supports = new boolean[PredictionTarget.NUM_ONTOLOGIES];
		} catch(Exception e) {
			System.err.println("an error occurred while parsing a blast hit line:");
			System.err.println(hitLine);
			e.printStackTrace();
			//System.err.println(e);
		}
		//System.out.println(hitID + "->" + goTerms + ", " + eValue);
	}
	
	// deprecated
	public BlastHit(String goTerms, double eValue)
	{
		this.goTerms = goTerms.split(",");
		this.eValue = eValue;
	}
	
	public double getEValue()
	{
		return eValue;
	}
	
	public String[] getGoTerms()
	{
		return goTerms;
	}
	
	public double getBitScore()
	{
		return bitScore;
	}
	
	public String getHitID()
	{
		return hitID;
	}
	
	public boolean supports(int ontologyType) {
		return supports[ontologyType];
	}

	public void activateOntologySupport(String ontology)
	{
		if(ontology.equals("molecular_function"))
			supports[PredictionTarget.MOLECULAR_FUNCTION] = true;
		else {
			if(ontology.equals("biological_process"))
				supports[PredictionTarget.BIOLOGICAL_PROCESS] = true;
			else
				supports[PredictionTarget.CELLULAR_COMPONENT] = true;
		}
	}
	
	@Override
	public int compareTo(BlastHit o) {
				
		if(this.eValue < o.eValue)
			return -1;
		else {
			if (this.eValue > o.eValue)
				return 1;
			else
				return 0;
		}

	}

}