File: coverage_to_regions.py

package info (click to toggle)
freebayes 1.3.9-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 6,980 kB
  • sloc: cpp: 125,778; ansic: 4,581; sh: 1,084; python: 672; asm: 271; javascript: 94; lisp: 85; makefile: 37; perl: 27
file content (50 lines) | stat: -rwxr-xr-x 1,495 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python3

import sys

if len(sys.argv) < 3:
    print("usage: <bamtools_coverage_output {} fasta_index num_regions >regions.bed".format(sys.argv[0]))
    print("Generates regions with even sequencing coverage, provided an input of coverage per-position as")
    print("generated by bamtools coverage.  In other words, generates regions such that the integral of")
    print("coverage is approximately equal for each.  These can be used when variant calling to reduce")
    print("variance in job runtime.")
    exit(1)

lengths = {}
fai = open(sys.argv[1])
for line in fai.readlines():
    c, l = line.split("\t")[:2]
    lengths[c] = int(l)

positions = []
total_coverage = 0
for line in sys.stdin:
    chrom, pos, depth = line.strip().split("\t")
    pos = int(pos)
    depth = int(depth)
    positions.append([chrom, pos, depth])
    total_coverage += depth

bp_per_region = total_coverage / int(sys.argv[2])

lchrom = None
lpos = 0
bp_in_region = 0

for line in positions:
    chrom, pos, depth = line #line.strip().split("\t")
    if lchrom != chrom:
        if lchrom:
            print(lchrom+":"+str(lpos)+"-"+str(lengths[lchrom]))
            lpos = 0
            lchrom = chrom
            bp_in_region = 0
        else:
            lchrom = chrom
    bp_in_region += depth
    if bp_in_region > bp_per_region:
        print(chrom+":"+str(lpos)+"-"+str(pos)) #, pos - lpos
        lpos = pos
        bp_in_region = 0

print(lchrom+":"+str(lpos)+"-"+str(lengths[lchrom]))