1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
|
#!/usr/bin/python3
"""
Application to convert MAF file to AXT file, projecting to any two species.
Reads a MAF file from standard input and writes an AXT file to standard out;
some statistics are written to standard error. The user must specify the
two species of interest.
usage: %prog primary_species secondary_species < maf_file > axt_file
"""
__author__ = "Bob Harris (rsharris@bx.psu.edu)"
import copy
import sys
import bx.align.axt
import bx.align.maf
def usage(s=None):
message = """
maf_to_axt primary_species secondary_species < maf_file > axt_file
"""
if s is None:
sys.exit(message)
else:
sys.exit(f"{s}\n{message}")
def main():
primary = None
secondary = None
args = sys.argv[1:]
while len(args) > 0:
arg = args.pop(0)
val = None
fields = arg.split("=", 1)
if len(fields) == 2:
arg = fields[0]
val = fields[1]
if val == "":
usage(f"missing a value in {arg}=")
if primary is None and val is None:
primary = arg
elif secondary is None and val is None:
secondary = arg
else:
usage(f"unknown argument: {arg}")
if primary is None:
usage("missing primary species")
if secondary is None:
usage("missing secondary species")
# read the alignments and other info
out = bx.align.axt.Writer(sys.stdout)
axtsRead = 0
mafsWritten = 0
for mafBlock in bx.align.maf.Reader(sys.stdin):
axtsRead += 1
p = mafBlock.get_component_by_src_start(primary)
if p is None:
continue
s = mafBlock.get_component_by_src_start(secondary)
if s is None:
continue
axtBlock = bx.align.Alignment(mafBlock.score, mafBlock.attributes)
axtBlock.add_component(clone_component(p))
axtBlock.add_component(clone_component(s))
remove_mutual_gaps(axtBlock)
if axtBlock.text_size == 0:
continue
out.write(axtBlock)
mafsWritten += 1
sys.stderr.write("%d blocks read, %d written\n" % (axtsRead, mafsWritten))
def clone_component(c):
return bx.align.Component(c.src, c.start, c.size, c.strand, c.src_size, copy.copy(c.text))
def remove_mutual_gaps(block):
if len(block.components) == 0:
return
nonGaps = []
for c in block.components:
for ix in range(0, block.text_size):
if ix not in nonGaps and c.text[ix] != "-":
nonGaps.append(ix)
nonGaps.sort()
for c in block.components:
c.text = "".join([c.text[ix] for ix in nonGaps])
block.text_size = len(nonGaps)
if __name__ == "__main__":
main()
|