File: genwig2.py

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (95 lines) | stat: -rw-r--r-- 2,318 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/python3
import sys
import re
import numpy as np

#regex for cigar
re_split_cigar  = re.compile('(\d+[MDI])(?=[0-9]|$)')

#SAM INDEXES
idx_qname = 0
idx_flag  = 1
idx_rname = 2
idx_pos   = 3
idx_mapq  = 4
idx_cigar = 5
idx_rnext = 6
idx_pnext = 7
idx_tlen  = 8
idx_seq   = 9
idx_qual  = 10

def write_err(msg, exit=False, status=1):
    sys.stderr.write(msg)
    if exit:
        sys.exit(status)

if __name__ == '__main__':
    chrlen   = int(sys.argv[1])
    _chr     = sys.argv[2]
    infile   = sys.argv[3]
    outfile  = sys.argv[4]
    nosingle = int(sys.argv[5])
    minins   = int(sys.argv[6])
    maxins   = int(sys.argv[7]) 

    #open input sam
    if infile == "-":
        insam = sys.stdin

    else:
        try:
            insam = open(infile, 'r')
        except IOError:
            write_err("ERROR: Unable to read input sam file\n", exit=True)

    #get file for writing
    if outfile == "-":
        outwig = sys.stdout
        
    else:
        try:
            outwig = open(outfile, 'w')
        except IOError:
            write_err("ERROR: Unable to open output file\n", exit=True)

    #start generating wig
    wig = np.zeros(chrlen, dtype = np.uint64)

    for row in insam:
        entry = row.split('\t')
        start = int(entry[idx_pos]) - 1
        tlen  = int(entry[idx_tlen])
        atlen = abs(tlen)

        #entry = gen_sam(row)
        #start = entry.pos - 1
        #atlen = abs(entry.tlen)
        if atlen > 0 and atlen >= minins and atlen <= maxins:
            if tlen > 0:
                wig[start:(start + tlen)] += 1
            else:
                continue
        else:
            cigars = re.split(re_split_cigar, entry[idx_cigar])
            cigars = [x for x in cigars if x != '']
            _len   = 0

            for c in cigars:
                if 'M' in c:
                    _len += int(c.split('M')[0])
                elif 'D' in c:
                    _len += int(c.split('D')[0])
                elif 'I' in c:
                    continue
                    _len += int(c.split('I')[0])
                else:
                    continue
            wig[start:(start + _len)] += 1


    outwig.write("variableStep chrom=" + _chr + "\n")
    for i in range(0, chrlen):
        outwig.write(str(i + 1) + "\t" + str(wig[i]) + "\n")