File: statalign_to_fastas.py

package info (click to toggle)
bali-phy 4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 15,392 kB
  • sloc: cpp: 120,442; xml: 13,966; haskell: 9,975; python: 2,936; yacc: 1,328; perl: 1,169; lex: 912; sh: 343; makefile: 26
file content (41 lines) | stat: -rw-r--r-- 1,237 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/python
from __future__ import print_function

def get_alignments(infile):
    alignment = None
    last_sample = None
    with open(infile, 'r') as infilestream:
        for line in infilestream:
            m = re.match("Sample (\d+)\s+Alignment:\s+(.*)$",line)
            if m is not None:
                current_sample = int(m.group(1))
                a = m.group(2)
                # when starting a new group, yield the last alignment and start a new one
                if last_sample != current_sample:
                    if alignment is not None:
                        yield alignment
                    alignment = []
                    last_sample = current_sample
                alignment.append(a)
# Since we only yield an alignment when we start a new one,
# the last one must be handled separately
    yield alignment

def print_alignment(alignment):
    for line in alignment:
        print(line)

if __name__ == '__main__':
    import codecs
    import re
    import sys
    import os

    infile = sys.argv[1]

    num = 0
    for alignment in get_alignments(infile):
        num += 1
        print_alignment(alignment)
        print("\n\n")
    sys.stderr.write("Read {} alignments.\n".format(num))