File: contig_name_filter.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (29 lines) | stat: -rwxr-xr-x 913 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/bin/python3

import sys
from Bio import SeqIO

if len(sys.argv) < 4:
    print("Usage:", sys.argv[0], "<contigs_file> <file with names> <output> [<operation mode>]")
    print("Operation mode is \"retain\" (default) or \"remove\"")
    sys.exit(1)

f_n = sys.argv[1]
names_f = open(sys.argv[2], "r")
names = set(l.strip() for l in names_f.readlines())
input_seq_iterator = SeqIO.parse(open(f_n, "r"), "fasta")

filtered_iterator = (record for record in input_seq_iterator \
                      if record.name in names)

if (len(sys.argv) == 5):
    if sys.argv[4] == "remove":
        filtered_iterator = (record for record in input_seq_iterator \
                      if record.name not in names)
    else:
        if sys.argv[4] != "retain":
            print("Wrong operation mode")

output_handle = open(sys.argv[3], "w")
SeqIO.write(filtered_iterator, output_handle, "fasta")
output_handle.close()