File: K3000_gfa_to_fa.py

package info (click to toggle)
discosnp 1%3A2.6.2-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,656 kB
  • sloc: python: 5,893; sh: 2,966; cpp: 2,692; makefile: 14
file content (20 lines) | stat: -rw-r--r-- 641 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import sys


def gfa_to_fa(gfa_file_name: str):
    """Prints genomic sequences from GFA nodes

    Args:
        gfa_file_name (str): File name of the input GFA
    """

    with open(gfa_file_name) as gfa_file:
        for gfa_line in gfa_file.readlines():
            # S       13      ctagtgggggacaaccaatcactaattgtgataAgatgtggtgtacacacactggtactggtcaggcaat  AS:37h; SP:0_70;        BP:0_61;        EV:1    FC:i:60 min:113 max:113 mean:113.0      AC:113;
            if gfa_line[0] != "S":
                continue
            print(f">{gfa_line.split()[1]}\n{gfa_line.split()[2]}")


if __name__ == '__main__':
    gfa_to_fa(sys.argv[1])