File: flatten_json.py

package info (click to toggle)
pizzly 0.37.3%2Bds-10
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 584 kB
  • sloc: cpp: 1,458; python: 66; sh: 29; makefile: 11
file content (51 lines) | stat: -rw-r--r-- 1,492 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/python3

import sys
import json
from collections import OrderedDict

###
### gene1_name gene1_id, gene2_name, gene2_id, type, pair, split, txlist

def loadJSON(fn):
    with open(fn) as f:
        JJ = json.load(f,object_pairs_hook=OrderedDict)
    return JJ['genes']

def outputGeneTable(fusions, outf, filters = None):
    outf.write('\t'.join("geneA.name geneA.id geneB.name geneB.id paircount splitcount transcripts.list".split()))
    outf.write('\n')
    for gf in fusions:
        gAname = gf['geneA']['name']
        gAid   = gf['geneA']['id']
        gBname = gf['geneB']['name']
        gBid   = gf['geneB']['id']
        pairs  = str(gf['paircount'])
        split  = str(gf['splitcount'])
        txp = [tp['fasta_record'] for tp in gf['transcripts']]

        outf.write('\t'.join([gAname, gAid, gBname, gBid, pairs, split, ';'.join(txp)]))
        outf.write('\n')

def usage():
    print("Usage: python3 flatten_json.py fusion.out.json [genetable.txt]")
    print("")
    print("       outputs a flat table listing all gene fusions, if the output file is not")
    print("       specified it prints to standard output")


if __name__ == "__main__":
    nargs = len(sys.argv)
    if nargs <= 1:
        usage()
    else:
        infn = sys.argv[1]
        fusions = loadJSON(infn)
        outf = sys.stdout
        if nargs == 3:
            outf = open(sys.argv[2],'w')

        outputGeneTable(fusions,outf)

        if outf != sys.stdout:
            outf.close()