File: mis_parser.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (72 lines) | stat: -rwxr-xr-x 2,243 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/python3 -O

############################################################################
# Copyright (c) 2015 Saint Petersburg State University
# Copyright (c) 2011-2014 Saint Petersburg Academic University
# All Rights Reserved
# See file LICENSE for details.
############################################################################


import sys
import os
import re

if len(sys.argv) < 2:
    print("Misassembled contigs getter: prints IDs of misassembled contigs or save these contigs in fasta (if input contigs file and output file specified)")
    print("Usage: " + sys.argv[0] + " <plantagota output> [input_contigs.fasta misassembled_contigs.fasta]")        
    sys.exit()

in_file = open(sys.argv[1], "r")

basename, ext = os.path.splitext(sys.argv[1])
out_IDs_filename = basename + ".txt.mis"
out_aligns_filename = basename + ".aligns"
out_IDs_file = open(out_IDs_filename, "w")
out_aligns_file = open(out_aligns_filename, "w")

mis_contigs_ids = []
mis_contigs_aligns = []

#skipping prologue
for line in in_file:
    if line.startswith("Analyzing contigs..."):
        break

# main part of plantagora output
cur_contig_id = ""
cur_contig_align = ""
was_extensive_mis = False
for line in in_file:
    if line.startswith("Analyzing coverage..."):
        break

    if line.startswith("	CONTIG:"):
        cur_contig_id = line.split("	CONTIG:")[1].strip()
        if was_extensive_mis:
            was_extensive_mis = False
            mis_contigs_aligns.append(cur_contig_align)
        cur_contig_align = ""
    cur_contig_align += line

    if (line.find("Extensive misassembly") != -1) and (cur_contig_id != ""):
        was_extensive_mis = True
        mis_contigs_ids.append(cur_contig_id.split()[0])
        cur_contig_id = ""

if was_extensive_mis:
    mis_contigs_aligns.append(cur_contig_align)
            
# printing IDs of misassembled contigs

print "IDs of misassembled contigs saved in " + out_IDs_filename
for contig_id in mis_contigs_ids:
    out_IDs_file.write(contig_id + '\n') 

print "Alignments of misassembled contigs saved in " + out_aligns_filename
for contig_align in mis_contigs_aligns:
    out_aligns_file.write(contig_align) 

in_file.close()
out_IDs_file.close()
out_aligns_file.close()