1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
|
#!/usr/bin/python3
"""
Find continuous regions that are covered by the first bed file (`bed_file_1`)
but not by the second bed file (`bed_file_2`)
usage: %prog bed_file_1 bed_file_2
"""
from bx.bitset_builders import binned_bitsets_from_file
from bx.cookbook import doc_optparse
def print_bits_as_bed(bits):
end = 0
while True:
start = bits.next_set(end)
if start == bits.size:
break
end = bits.next_clear(start)
print("%s\t%d\t%d" % (chrom, start, end))
options, args = doc_optparse.parse(__doc__)
try:
in_fname, in2_fname = args
except ValueError:
doc_optparse.exit()
# Read first bed into some bitsets
bitsets1 = binned_bitsets_from_file(open(in_fname))
bitsets2 = binned_bitsets_from_file(open(in2_fname))
for chrom in bitsets1:
if chrom not in bitsets1:
continue
bits1 = bitsets1[chrom]
if chrom in bitsets2:
bits2 = bitsets2[chrom]
bits2.invert()
bits1.iand(bits2)
print_bits_as_bed(bits1)
|