File: add_groundtruth.py

package info (click to toggle)
hinge 0.5.0-8
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,972 kB
  • sloc: cpp: 9,480; ansic: 8,826; python: 5,023; sh: 340; makefile: 10
file content (61 lines) | stat: -rwxr-xr-x 1,818 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/python3

import networkx as nx
import sys

graphml_file = sys.argv[1]
groundtruth_file = sys.argv[2]
graphml_file_w_groundtruth = sys.argv[3]
try:
    chromosome_to_consider= int(sys.argv[4])
except:
    chromosome_to_consider=None

g = nx.read_graphml(graphml_file)

print(nx.info(g))

mapping_dict = {}

with open(groundtruth_file,'r') as f:
    for num, line in enumerate(f.readlines()):
        m = list(map(int, line.strip().split()))
        # mapping_dict[num] = [min(m), max(m), int(m[0]>m[1])]
        mapping_dict[num] = [m[2],m[3],m[1]]
        
#print mapping_dict

max_len=0
for num in list(mapping_dict.keys()):
    max_len=max(max_len,len(str(m[3])))


pow_mov=10**(max_len+1)
for node in g.nodes():
    #print node
    try:
        nodeid = int(node.split('_')[0])
        #print nodeid
        rev = int(node.split('_')[1])
        if chromosome_to_consider != None:
            g.node[node]['chromosome'] = 0
            if mapping_dict[nodeid][2]==chromosome_to_consider:
                g.node[node]['chromosome'] = mapping_dict[nodeid][2]+1
        else:
            g.node[node]['chromosome'] = mapping_dict[nodeid][2]+1
        
        if rev == 0:
            g.node[node]['aln_end'] =  mapping_dict[nodeid][2]*pow_mov+ mapping_dict[nodeid][1]
            g.node[node]['aln_start'] = mapping_dict[nodeid][2]*pow_mov + mapping_dict[nodeid][0]
            # g.node[node]['aln_strand'] = mapping_dict[nodeid][2]
        else:
            g.node[node]['aln_end'] = mapping_dict[nodeid][2]*pow_mov + mapping_dict[nodeid][1]
            g.node[node]['aln_start'] = mapping_dict[nodeid][2]*pow_mov+ mapping_dict[nodeid][0]
            # g.node[node]['aln_strand'] = 1-mapping_dict[nodeid][2]
            
    except:
        pass
        
nx.write_graphml(g, graphml_file_w_groundtruth)