File: bed_build_windows.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (56 lines) | stat: -rwxr-xr-x 1,620 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/python3

"""
Build windows of length `window_size` over the sequences defined by
`len_file` excluding regions in `gap_file`.

After removing the gaps, windows of exactly `window_size` units will be
placed in the remaining regions, with the extra space evenly placed
between the windows.

`len_file` is LEN format (name length) and `gap_file is BED (name start stop).

usage: %prog len_file gap_file window_size
"""

import sys

from bx.bitset_builders import binned_bitsets_from_file


def main():
    region_fname, exclude_fname, window_size = sys.argv[1], sys.argv[2], int(sys.argv[3])
    exclude_bitsets = binned_bitsets_from_file(open(exclude_fname))
    for line in open(region_fname):
        fields = line.split()
        chr, start, end = fields[0], 0, int(fields[1])
        if chr not in exclude_bitsets:
            do_windows(chr, start, end, window_size)
        else:
            bits = exclude_bitsets[chr]
            assert end < bits.size
            e = 0
            while True:
                s = bits.next_clear(e)
                if s > end:
                    break
                e = bits.next_set(s)
                do_windows(chr, s, min(e, end), window_size)


def do_windows(chr, start, end, window_size):
    length = end - start
    window_count = length // window_size
    if window_count == 0:
        return
    lost = length % window_size
    skip_amount = lost // window_count
    s = 0
    for i in range(0, window_count):
        s += skip_amount
        print(chr, start + s, start + s + window_size)
        s += window_size


if __name__ == "__main__":
    main()