1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
|
#!/usr/bin/python3
"""
Chops alignments in a MAF file to piece of a specified length. A random set of
non overlapping chunks of exactly the specified chop length will be produced
usage: %prog [options] < maf > maf
-l, --length: Chop to exactly this length in columns (default 100)
"""
import random
import sys
from optparse import OptionParser
import bx.align.maf
def main():
# Parse command line arguments
parser = OptionParser()
parser.add_option("-l", "--length", action="store", type="int", default=100, help="")
(options, args) = parser.parse_args()
length = options.length
maf_reader = bx.align.maf.Reader(sys.stdin)
maf_writer = bx.align.maf.Writer(sys.stdout)
for m in maf_reader:
for chopped in chop(m, length):
maf_writer.write(chopped)
def chop(m, length):
maf_length = m.text_size
chunk_count = maf_length // length
lost_bases = maf_length % length
skip_amounts = [0] * (chunk_count + 1)
for i in range(0, lost_bases):
skip_amounts[random.randrange(0, chunk_count + 1)] += 1
start = 0
rval = []
for i in range(0, chunk_count):
start += skip_amounts[i]
n = m.slice(start, start + length)
if check_len(n):
rval.append(m.slice(start, start + length))
start += length
return rval
def check_len(a):
for c in a.components:
if c.size == 0:
return False
return True
if __name__ == "__main__":
main()
|