File: Relations.java

package info (click to toggle)
metastudent 2.0.1-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 95,644 kB
  • sloc: java: 3,287; perl: 2,089; python: 1,421; ruby: 242; sh: 39; makefile: 19
file content (161 lines) | stat: -rwxr-xr-x 4,338 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
package pp2.go;

import java.io.BufferedReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;

/**
 * example constructor:
 * Relations test = new Relations("./../data/deductiveClosureGO.txt","./../data/goterm.txt",true);
 * noCC stands for no Cellular Component (for CAFA, true, since CC is ignored)		
 * @author Mark Heron
 *
 */
public class Relations {
	
	HashMap<String, String[]> ahnen;
	HashMap<String, String> type;
	boolean noCC;
	
	public Relations(String closure, String terms, boolean noCC) {

		BufferedReader input;

		this.noCC = noCC;
		
		// create type
		type = new HashMap<String, String>();
		input = pp2.tools.Tools.openFile(terms);

		try {
			String line;
			while((line = input.readLine()) != null) {
				
				String[] split = line.split("\t");
				if(split.length > 4) {
					
					String gokey = split[3];
					String typevalue = split[2];
					
					type.put(gokey, typevalue);
				}
			}
			input.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		// create ancestors/parents
		ahnen = new HashMap<String, String[]>();
		HashMap<String, HashSet<String>> ahnen_tmp = new HashMap<String, HashSet<String>>();
		
		input = pp2.tools.Tools.openFile(closure);

		try {
			String line;
			while((line = input.readLine()) != null) {
				
				if(line.startsWith("GO:")) {
					
					String[] split = line.split("\t");
					String key = split[0];
					String value = split[2];
					if(getType(key).length() > 0 && getType(value).length() > 0 ) {
						if(isTypeofInterst(key) && isTypeofInterst(value)) {
							if(ahnen_tmp.containsKey(key)) {
								ahnen_tmp.get(key).add(value);
							} else {
								HashSet<String> ll = new HashSet<String>();
								ll.add(value);
								ahnen_tmp.put(key, ll);
							}
						}
					}
				}
			}
			input.close();
		} catch (Exception e) {
			e.printStackTrace();
		}

		for (String key : ahnen_tmp.keySet()) {
			String[] values = new String[0];
			values = ahnen_tmp.get(key).toArray(values);
			ahnen.put(key, values);
		}
		
	}
	
	
	
	public String[] getAncestors(String goterm) {
		if(ahnen.containsKey(goterm)) {
			return ahnen.get(goterm);
		} else {
			return new String[0];
		}
	}
	
	
	
	public String getType(String goterm) {
		if (type.containsKey(goterm)) {
			return type.get(goterm);
		} else {
			return "";
		}
	}
	
	public boolean isTypeofInterst(String goterm) {
		if(!type.containsKey(goterm)) { // only happens for GO:0048220
			return true;				// no idea why it is missing in goterm.txt
		}
		String tmp = type.get(goterm);
		if(tmp.equals("biological_process") || tmp.equals("molecular_function") || tmp.equals("cellular_component")) {
			return true;
		} else if (!noCC && tmp.equals("cellular_component")){
			return true;
		} else {
			return false;
		}
	}
	
	
	
	public String[][] getExclusivePaths(String[] goterms) {
		
		String[][] results = new String[goterms.length][];
		HashSet<String> allreadydone = new HashSet<String>();
		
		for (int i = 0; i < goterms.length; i++) {
			String goterm = goterms[i];
			String[] anc = getAncestors(goterm);
			HashSet<String> exc = new HashSet<String>();
			
			for (String a : anc) {
				if( ! allreadydone.contains(a)) {
					exc.add(a);
					allreadydone.add(a);
				}
			}
			String[] tmp = new String[0];
			results[i] = (String[]) exc.toArray(tmp);
		}
		return results;
	}
	
	
	
	public static void main(String[] args) {
		
		Relations test = new Relations("C:\\Dokumente und Einstellungen\\murgs\\Eigene Dateien\\Studium\\9. Semester\\PP2\\ProteinPrediction2\\data\\deductiveClosureGO.txt"
				,"C:\\Dokumente und Einstellungen\\murgs\\Eigene Dateien\\Studium\\9. Semester\\PP2\\ProteinPrediction2\\data\\goterm.txt",true);
		String[] tmp = {"GO:999","GO:0005737","GO:0019134","GO:0000287","GO:0003977","GO:0000902","GO:0007047","GO:0009103","GO:0009252","GO:0008360","GO:0046872","GO:0008152"};
		DAG dag = new DAG(test, tmp);
		System.out.println(Arrays.toString(dag.getNodes()));
		System.out.println(Arrays.toString(dag.getLeaves()));
		System.out.println(Arrays.toString(dag.getAncestors("GO:0005737")));
		System.out.println(Arrays.deepToString(test.getExclusivePaths(tmp)));
	}
}