File: maf_drop_overlapping.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (54 lines) | stat: -rwxr-xr-x 1,175 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/python3

"""
Remove any blocks from a maf that overlap any of a set of intervals.

usage: %prog interval files... < maf
"""

import sys

import bx.align.maf
from bx import intervals
from bx.cookbook import doc_optparse


def __main__():
    options, args = doc_optparse.parse(__doc__)

    try:
        assert len(args) > 0
    except AssertionError:
        doc_optparse.exit()

    # Load Intervals

    intersector = intervals.Intersecter()

    for f in args:
        for line in open(f):
            if line.startswith("#") or line.isspace():
                continue
            fields = line.split()
            intersector.add_interval(intervals.Interval(int(fields[0]), int(fields[1])))

    # Start MAF on stdout

    out = bx.align.maf.Writer(sys.stdout)

    # Iterate over input MAF

    for maf in bx.align.maf.Reader(sys.stdin):
        # Find overlap with reference component
        intersections = intersector.find(maf.components[0].start, maf.components[0].end)
        # Write only if no overlap
        if len(intersections) == 0:
            out.write(maf)

    # Close output MAF

    out.close()


if __name__ == "__main__":
    __main__()