File: compare_gaps.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (124 lines) | stat: -rwxr-xr-x 4,115 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/python3

############################################################################
# Copyright (c) 2015 Saint Petersburg State University
# Copyright (c) 2011-2014 Saint Petersburg Academic University
# All Rights Reserved
# See file LICENSE for details.
############################################################################


import os
import sys 

def parse_name(name):
      pieces = name.strip().split('_')
      return int(pieces[1])


def find_length(filename):
      inFileName = filename
      inFile = open(inFileName)
     
      length = 0
      for line in inFile:
	      parse = line.strip().split(' ')
	      if parse[0] == "Total" and parse[1] == "Region":
		      length = int(parse[3])
		      break
      print length
      return length
      
def make_snap(filename, length):
      inFileName = filename
      inFile = open(inFileName)
      nucls = [0]*(length+1)       
      contig_id = 0
      shift = 7
      for line in inFile:
	      parse = line.strip().split(' ')
	      if parse[0] == "CONTIG:":
		      name = parse[1]    
		      contig_id = parse_name(name)
	      if parse[0] == "One":
		      print "ID: " + str(contig_id) + " : " + parse[shift]+ " - " +parse[shift + 1]
		      for i in range(int(parse[shift]),int(parse[shift + 1]) + 1):
			       nucls[i] = contig_id
			       if int(parse[shift + 3]) > int(parse[shift + 4]):     
			                  nucls[i] = nucls[i] * (-1)
			                  
	      if parse[0] == "Ambiguous" and parse[1] == "Alignment:":
		      for i in range(int(parse[2]),int(parse[3]) + 1):
				nucls[i] = contig_id
				if int(parse[5]) > int(parse[6]):     
					    nucls[i] = nucls[i] * (-1)
			                   
	      if parse[0] == "Real" and parse[1] == "Alignment":
		      for i in range(int(parse[3]),int(parse[4]) + 1):
			       nucls[i] = contig_id
			       if int(parse[6]) > int(parse[7]):     
			                   nucls[i] = nucls[i] * (-1)                      
	
	      if parse[0] == "Marking" and parse[2] == "ambiguous:":
		      for i in range(int(parse[3]),int(parse[4]) + 1):
			       nucls[i] = contig_id
			       if int(parse[6]) > int(parse[7]):     
			                   nucls[i] = nucls[i] * (-1) 
			                   
      inFile.close()
      nucls[0] = 1  
     
      toprint = ""
      tocount = 0
      contig_id = -1
      total_len = 0
      num = 0
      print "GAPS: "
      for i in range(1,length+1):
	      if nucls[i] == 0 and (not nucls[i - 1] == 0):
		      toprint = str(i)+" - "
		      tocount = i    
		      contig_id = nucls[i - 1]
              if nucls[i] == 0 and (i + 1 > length or (not nucls[i + 1] == 0)):
                      num = num + 1
                      print toprint+str(i)+" " + str(i-tocount+1)     
                      total_len = total_len + i - tocount + 1
      
      print "Total length: " + str(total_len)    
      print "Total number: " + str(num)
      return nucls
  
def find__gaps(nucls1, nucls2, length):  

      as_before =  False
      piece_len = 0
      total_len = 0
      for i in range(1, length + 1):    
               piece_len = piece_len + 1
               if (not nucls1[i] == nucls1[i - 1] or nucls2[i] == 1):
		       if as_before == True:
			  print " End position: "+str(i) 
			  print " Total length: " + str(piece_len)	
		       as_before = False
		       piece_len = 0
               if (nucls2[i] == 0 and (not nucls1[i] == 0) and (not as_before)):
	               print "ID: " + str(nucls1[i]) 
	               print " Start position: " + str(i)
	               as_before = True          
      
      
      

                

if len(sys.argv) != 2:
        print "Prints gaps for given plantagora output"
        print("Usage: " + sys.argv[0] + " <plantagora output>")
        sys.exit()
       
length = find_length(sys.argv[1])       
gaps_before = make_snap(sys.argv[1], length)
#print "*************************************** \nSecond:"
#gaps_after = make_snap(sys.argv[2], length)
#print "####################################### \nCounting gaps ..."
#find__gaps(gaps_before, gaps_after, length)