1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
#!/usr/bin/python3
"""
Find regions of first bed file that overlap regions in a second bed file. The
output preserves all fields from the input.
NOTE: -u and -d options are currently not functional!
usage: %prog bed_file_1 bed_file_2
-m, --mincols=N: Require this much overlap (default 1bp)
-u, --upstream_pad=N: upstream interval padding (default 0bp)
-d, --downstream_pad=N: downstream interval padding (default 0bp)
-v, --reverse: Print regions that DO NOT overlap
-b, --booleans: Just print '1' if interval overlaps or '0' otherwise
"""
from warnings import warn
from bx.bitset_builders import binned_bitsets_from_file
from bx.cookbook import doc_optparse
mincols = 1
upstream_pad = 0
downstream_pad = 0
options, args = doc_optparse.parse(__doc__)
try:
if options.mincols:
mincols = int(options.mincols)
if options.upstream_pad:
upstream_pad = int(options.upstream_pad)
if options.downstream_pad:
downstream_pad = int(options.downstream_pad)
reverse = bool(options.reverse)
booleans = bool(options.booleans)
in_fname, in2_fname = args
except Exception:
doc_optparse.exit()
# Read first bed into some bitsets
bitsets = binned_bitsets_from_file(open(in2_fname))
# Read second BED and intersect
for line in open(in_fname):
if line.startswith("#") or line.isspace():
continue
fields = line.split()
start, end = int(fields[1]), int(fields[2])
if start > end:
warn("Bed interval start after end!")
if fields[0] in bitsets and bitsets[fields[0]].count_range(start, end - start) >= mincols:
if booleans:
if reverse:
print(0)
else:
print(1)
elif not reverse:
print(line, end=" ")
else:
if booleans:
if reverse:
print(1)
else:
print(0)
elif reverse:
print(line, end=" ")
|