File: add_groundtruth_json.py

package info (click to toggle)
hinge 0.5.0-8
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,972 kB
  • sloc: cpp: 9,480; ansic: 8,826; python: 5,023; sh: 340; makefile: 10
file content (52 lines) | stat: -rw-r--r-- 1,496 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import networkx as nx
import sys
import json

graphml_file = sys.argv[1]
groundtruth_file = sys.argv[2]
graphml_file_w_groundtruth = sys.argv[3]

g = nx.read_graphml(graphml_file)

print(nx.info(g))

with open(groundtruth_file) as f:
    read_dict=json.load(f)

max_len=0
for read  in read_dict:
    for aln_info in read_dict[read]:
        try:
            max_len=max(max_len,len(str(aln_info[0])))
            max_len=max(max_len,len(str(aln_info[1])))
        except:
            print() 
            raise

pow_mov=10**(max_len+1)

for node in g.nodes():
    #print node
    nodeid = node.split('_')[0]
    #print nodeid
    rev = int(node.split('_')[1])
    if rev==1:
        nodeid+="'"

    if nodeid in read_dict:
        g.node[node]['chr'] = read_dict[nodeid][0][2]
        g.node[node]['aln_end'] =  pow_mov*read_dict[nodeid][0][2]+max(read_dict[nodeid][0][0],read_dict[nodeid][0][1])
#         g.node[node]['aln_start'] = pow_mov*read_dict[nodeid][0][2]+min(read_dict[nodeid][0][0],read_dict[nodeid][0][1])
#         g.node[node]['repeat']=0
#         if len (read_dict[nodeid]) >1 :
#             g.node[node]['repeat']=1
#             chrom_maps=set([aln[3] for aln in read_dict[nodeid]])
#             if len (chrom_maps) >  1:
#                 g.node[node]['repeat']=10
    else:
        g.node[node]['chr'] = -1
        g.node[node]['aln_end'] =  -1
#         g.node[node]['aln_start'] = -1
#         g.node[node]['repeat']=-1

nx.write_graphml(g, graphml_file_w_groundtruth)