File: axt_extract_ranges.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (67 lines) | stat: -rwxr-xr-x 1,837 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/python3

"""
Reads a list of intervals and an axt. Produces a new axt containing the
portions of the original that overlapped the intervals

usage: %prog interval_file refindex [options] < axt_file
   -m, --mincols=10: Minimum length (columns) required for alignment to be output
"""

import sys

import bx.align.axt
from bx import intervals
from bx.cookbook import doc_optparse


def __main__():
    options, args = doc_optparse.parse(__doc__)

    try:
        range_filename = args[0]
        refindex = int(args[1])
        if options.mincols:
            mincols = int(options.mincols)
        else:
            mincols = 10
    except Exception:
        doc_optparse.exit()

    # Load Intervals

    intersecter = intervals.Intersecter()
    for line in open(range_filename):
        fields = line.split()
        intersecter.add_interval(intervals.Interval(int(fields[0]), int(fields[1])))

    # Start axt on stdout

    out = bx.align.axt.Writer(sys.stdout)

    # Iterate over input axt

    for axt in bx.align.axt.Reader(sys.stdin):
        ref_component = axt.components[refindex]
        # Find overlap with reference component
        intersections = sorted(intersecter.find(ref_component.start, ref_component.end))
        # Keep output axt ordered
        # Write each intersecting block
        for interval in intersections:
            start = max(interval.start, ref_component.start)
            end = min(interval.end, ref_component.end)
            sliced = axt.slice_by_component(refindex, start, end)
            good = True
            for c in sliced.components:
                if c.size < 1:
                    good = False
            if good and sliced.text_size > mincols:
                out.write(sliced)

    # Close output axt

    out.close()


if __name__ == "__main__":
    __main__()