File: bed_count_by_interval.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (35 lines) | stat: -rwxr-xr-x 807 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/python3
"""
For each interval in `bed1` count the number of intersecting regions in `bed2`.

usage: %prog bed1 bed2
"""

import sys

from bx.intervals import (
    Intersecter,
    Interval,
)

bed1, bed2 = sys.argv[1:3]

ranges = {}
for line in open(bed2):
    fields = line.strip().split()
    chrom = fields[0]
    start = int(fields[1])
    end = int(fields[2])
    if chrom not in ranges:
        ranges[chrom] = Intersecter()
    ranges[chrom].add_interval(Interval(start, end))

for line in open(bed1):
    fields = line.strip().split()
    chrom, start, end = fields[0], int(fields[1]), int(fields[2])
    other = " ".join(fields[3:])
    out = " ".join(fields[:3] + [other])
    if chrom in ranges:
        print(out, len(ranges[chrom].find(start, end)))
    else:
        print(out, 0)