File: merge.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (38 lines) | stat: -rw-r--r-- 1,462 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""
Merge overlapping regions in two sets of genomic intervals.
"""

from bx.intervals.io import BitsetSafeReaderWrapper
from bx.intervals.operations import (
    bits_set_in_range,
    MAX_END,
)


# sorting could make this a less memory intensive operation(?)
def merge(interval, mincols=1):
    # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when
    # the bitsets are being created by skipping the problem lines
    interval = BitsetSafeReaderWrapper(interval, lens={})
    bitsets = interval.binned_bitsets()
    if interval.header:
        yield interval.header
    for chrom in bitsets:
        bitset = bitsets[chrom]
        output = ["."] * (max(interval.chrom_col, interval.start_col, interval.end_col) + 1)
        output[interval.chrom_col] = chrom
        try:
            for start, end in bits_set_in_range(bitset, 0, MAX_END):
                output[interval.start_col] = str(start)
                output[interval.end_col] = str(end)
                yield output
        except IndexError as e:
            try:
                # This will work only if interval is a NiceReaderWrapper
                interval.skipped += 1
                # no reason to stuff an entire bad file into memmory
                if interval.skipped < 10:
                    interval.skipped_lines.append((interval.linenum, interval.current_line, str(e)))
            except Exception:
                pass
            continue