File: maf_to_concat_fasta.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (71 lines) | stat: -rwxr-xr-x 1,735 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/python3

"""
Read a maf and print the text as a fasta file, concatenating blocks. A
specific subset of species can be chosen.

usage %prog [options] species1,species2,... < maf_file > fasta_file
    --fill="expression": Insert this between blocks
    --wrap=columns: Wrap FASTA to this many columns
"""

import sys
from optparse import OptionParser

from bx.align import maf


def __main__():
    # Parse command line arguments

    parser = OptionParser()
    parser.add_option("--fill", action="store", default=None, type="string", help="")
    parser.add_option("--wrap", action="store", default=None, type="int", help="")
    parser.add_option("--nowrap", action="store_true", default=False, dest="nowrap", help="")

    (options, args) = parser.parse_args()

    species = []
    for arg in args:
        species.extend(arg.split(","))

    fill = ""
    if options.fill:
        fill = eval(options.fill)

    wrap = 50
    if options.wrap is not None:
        wrap = options.wrap
    elif options.nowrap:
        wrap = 0

    # create the concatenated sequences

    texts = {}
    for s in species:
        texts[s] = []
    maf_reader = maf.Reader(sys.stdin)
    for m in maf_reader:
        for s in species:
            c = m.get_component_by_src_start(s)
            if c:
                texts[s].append(c.text)
            else:
                texts[s].append("-" * m.text_size)
    for s in species:
        print(">" + s)
        print_n(fill.join(texts[s]), wrap)


def print_n(s, n, f=sys.stdout):
    if n <= 0:
        print(s, file=f)
    else:
        p = 0
        while p < len(s):
            print(s[p : min(p + n, len(s))], file=f)
            p += n


if __name__ == "__main__":
    __main__()