File: bed_intersect.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (68 lines) | stat: -rwxr-xr-x 1,956 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python3

"""
Find regions of first bed file that overlap regions in a second bed file. The
output preserves all fields from the input.

NOTE: -u and -d options are currently not functional!

usage: %prog bed_file_1 bed_file_2
    -m, --mincols=N: Require this much overlap (default 1bp)
    -u, --upstream_pad=N: upstream interval padding (default 0bp)
    -d, --downstream_pad=N: downstream interval padding (default 0bp)
    -v, --reverse: Print regions that DO NOT overlap
    -b, --booleans: Just print '1' if interval overlaps or '0' otherwise
"""

from warnings import warn

from bx.bitset_builders import binned_bitsets_from_file
from bx.cookbook import doc_optparse

mincols = 1
upstream_pad = 0
downstream_pad = 0

options, args = doc_optparse.parse(__doc__)
try:
    if options.mincols:
        mincols = int(options.mincols)
    if options.upstream_pad:
        upstream_pad = int(options.upstream_pad)
    if options.downstream_pad:
        downstream_pad = int(options.downstream_pad)
    reverse = bool(options.reverse)
    booleans = bool(options.booleans)
    in_fname, in2_fname = args
except Exception:
    doc_optparse.exit()

# Read first bed into some bitsets

bitsets = binned_bitsets_from_file(open(in2_fname))

# Read second BED and intersect

for line in open(in_fname):
    if line.startswith("#") or line.isspace():
        continue
    fields = line.split()
    start, end = int(fields[1]), int(fields[2])
    if start > end:
        warn("Bed interval start after end!")
    if fields[0] in bitsets and bitsets[fields[0]].count_range(start, end - start) >= mincols:
        if booleans:
            if reverse:
                print(0)
            else:
                print(1)
        elif not reverse:
            print(line, end=" ")
    else:
        if booleans:
            if reverse:
                print(1)
            else:
                print(0)
        elif reverse:
            print(line, end=" ")