File: clusters2csv.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (21 lines) | stat: -rwxr-xr-x 595 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3
import sys

from Bio import SeqIO

from os import listdir
from os.path import isfile, join

if len(sys.argv) < 3:
    print("Usage: %s <cluster directory> <output> " % sys.argv[0])
    sys.exit(1)

path = sys.argv[1]

with open(sys.argv[2], "w") as output:
    for f in listdir(path):
        if isfile(join(path, f)) and f.endswith("fna"):
            cluster = f.split(".")[0].split("_")[-1]
            record_dict = SeqIO.to_dict(SeqIO.parse(join(path, f), "fasta"))
            for k in record_dict.keys():
                print(str(k) + "," + str(cluster), file=output)