File: maf_chop.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (61 lines) | stat: -rwxr-xr-x 1,494 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/python3

"""
Chops alignments in a MAF file to piece of a specified length. A random set of
non overlapping chunks of exactly the specified chop length will be produced

usage: %prog [options] < maf > maf
  -l, --length: Chop to exactly this length in columns (default 100)
"""

import random
import sys
from optparse import OptionParser

import bx.align.maf


def main():
    # Parse command line arguments

    parser = OptionParser()
    parser.add_option("-l", "--length", action="store", type="int", default=100, help="")

    (options, args) = parser.parse_args()

    length = options.length
    maf_reader = bx.align.maf.Reader(sys.stdin)
    maf_writer = bx.align.maf.Writer(sys.stdout)

    for m in maf_reader:
        for chopped in chop(m, length):
            maf_writer.write(chopped)


def chop(m, length):
    maf_length = m.text_size
    chunk_count = maf_length // length
    lost_bases = maf_length % length
    skip_amounts = [0] * (chunk_count + 1)
    for i in range(0, lost_bases):
        skip_amounts[random.randrange(0, chunk_count + 1)] += 1
    start = 0
    rval = []
    for i in range(0, chunk_count):
        start += skip_amounts[i]
        n = m.slice(start, start + length)
        if check_len(n):
            rval.append(m.slice(start, start + length))
        start += length
    return rval


def check_len(a):
    for c in a.components:
        if c.size == 0:
            return False
    return True


if __name__ == "__main__":
    main()