File: maf_to_axt.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (112 lines) | stat: -rwxr-xr-x 2,735 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/python3
"""
Application to convert MAF file to AXT file, projecting to any two species.
Reads a MAF file from standard input and writes an AXT file to standard out;
some statistics are written to standard error.  The user must specify the
two species of interest.

usage: %prog primary_species secondary_species < maf_file > axt_file
"""

__author__ = "Bob Harris (rsharris@bx.psu.edu)"

import copy
import sys

import bx.align.axt
import bx.align.maf


def usage(s=None):
    message = """
maf_to_axt primary_species secondary_species < maf_file > axt_file
"""
    if s is None:
        sys.exit(message)
    else:
        sys.exit(f"{s}\n{message}")


def main():
    primary = None
    secondary = None

    args = sys.argv[1:]
    while len(args) > 0:
        arg = args.pop(0)
        val = None
        fields = arg.split("=", 1)
        if len(fields) == 2:
            arg = fields[0]
            val = fields[1]
            if val == "":
                usage(f"missing a value in {arg}=")

        if primary is None and val is None:
            primary = arg
        elif secondary is None and val is None:
            secondary = arg
        else:
            usage(f"unknown argument: {arg}")

    if primary is None:
        usage("missing primary species")

    if secondary is None:
        usage("missing secondary species")

    # read the alignments and other info

    out = bx.align.axt.Writer(sys.stdout)

    axtsRead = 0
    mafsWritten = 0
    for mafBlock in bx.align.maf.Reader(sys.stdin):
        axtsRead += 1

        p = mafBlock.get_component_by_src_start(primary)
        if p is None:
            continue
        s = mafBlock.get_component_by_src_start(secondary)
        if s is None:
            continue

        axtBlock = bx.align.Alignment(mafBlock.score, mafBlock.attributes)
        axtBlock.add_component(clone_component(p))
        axtBlock.add_component(clone_component(s))

        remove_mutual_gaps(axtBlock)
        if axtBlock.text_size == 0:
            continue

        out.write(axtBlock)
        mafsWritten += 1

    sys.stderr.write("%d blocks read, %d written\n" % (axtsRead, mafsWritten))


def clone_component(c):
    return bx.align.Component(c.src, c.start, c.size, c.strand, c.src_size, copy.copy(c.text))


def remove_mutual_gaps(block):
    if len(block.components) == 0:
        return

    nonGaps = []

    for c in block.components:
        for ix in range(0, block.text_size):
            if ix not in nonGaps and c.text[ix] != "-":
                nonGaps.append(ix)

    nonGaps.sort()

    for c in block.components:
        c.text = "".join([c.text[ix] for ix in nonGaps])

    block.text_size = len(nonGaps)


if __name__ == "__main__":
    main()