File: analyze_sinks.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (121 lines) | stat: -rwxr-xr-x 3,995 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/python3

############################################################################
# Copyright (c) 2015 Saint Petersburg State University
# Copyright (c) 2011-2014 Saint Petersburg Academic University
# All Rights Reserved
# See file LICENSE for details.
############################################################################


import os
import sys 

def parse_name(name):
      pieces = name.strip().split('_')
      return int(pieces[len(pieces) - 1])


def find_length(filename):
      inFileName = filename
      inFile = open(inFileName)
     
      length = 0
      for line in inFile:
	      parse = line.strip().split(' ')
	      if parse[0] == "Total" and parse[1] == "Region":
		      length = int(parse[3])
		      break
      print length
      return length
      
def make_snap(filename, length):
      inFileName = filename
      inFile = open(inFileName)
      nucls = [0]*(length+1)       
      contig_id = 0
      shift = 7
      for line in inFile:
	      parse = line.strip().split(' ')
	      if parse[0] == "CONTIG:":
		      name = parse[1]    
		      contig_id = parse_name(name)
	      if parse[0] == "One":
		      for i in range(int(parse[shift]),int(parse[shift + 1]) + 1):
			       nucls[i] = contig_id
			       if int(parse[shift + 3]) > int(parse[shift + 4]):     
			                  nucls[i] = nucls[i] * (-1)
			                  
	      if parse[0] == "Ambiguous" and parse[1] == "Alignment:":
		      for i in range(int(parse[2]),int(parse[3]) + 1):
				nucls[i] = contig_id
				if int(parse[5]) > int(parse[6]):     
					    nucls[i] = nucls[i] * (-1)
			                   
	      if parse[0] == "Real" and parse[1] == "Alignment":
		      for i in range(int(parse[3]),int(parse[4]) + 1):
			       nucls[i] = contig_id
			       if int(parse[6]) > int(parse[7]):     
			                   nucls[i] = nucls[i] * (-1)                      
	
	      if parse[0] == "Marking" and parse[2] == "ambiguous:":
		      for i in range(int(parse[3]),int(parse[4]) + 1):
			       nucls[i] = contig_id
			       if int(parse[6]) > int(parse[7]):     
			                   nucls[i] = nucls[i] * (-1) 
			                   
      inFile.close()
      nucls[0] = 1   
      contig_id1 = -1
      contig_id2 = -1
      result = []
      for i in range(1,length+1):
	    if (nucls[i] > 0):
	        contig_id1 = contig_id2
	        contig_id2 = nucls[i]
	        
	    if (contig_id1 > 0 and contig_id2 > 0 and not(contig_id1 == contig_id2)):
	       result.append([contig_id1, contig_id2])
	       
      return result
      
def find_first(path_id):
     inFile = open("../../../data/debruijn/ECOLI_SC_LANE_1_woHUMAN/K55/latest/paths.dat", "r")
     now = 0
     for line in inFile:
	  parse = line.strip().split(' ')
	  if (now == 1):
	      print parse[0]
	      inFile.close()
	      return parse[0]
	  if (parse[0] == "PATH" and parse[1] == str(path_id)):
	      now = 1    
     inFile.close()
      
def print_pairs_of_edges(pairs, filename):
     outFile = open("../../../sinks_with_sources.log", "w")
     for i in range(0,len(pairs)):
          inFile = open(filename, "r")
          source = '0'
          for line in inFile:
	      parse = line.strip().split(' ')
	      if (parse[0] == str(pairs[i][1])):
		 source = parse[1] 
          inFile.close()
          inFile = open("../../../sinks.log", "r")
          for line in inFile:
	      parse = line.strip().split(' ')
	      if (parse[0] == str(pairs[i][0])):
	         outFile.write(str(parse[1])+ " Must: " + str(find_first(pairs[i][1])) + " Was found: " +source + "\n")
          inFile.close()
     outFile.close()
     return 

if len(sys.argv) != 3:
        print "Print to sinks_with_sources.log next edges for sinks"
        print "Usage: " + sys.argv[0] + " <plantagora output>  scafolder_output"
        sys.exit()
       
length = find_length(sys.argv[1])       
pairs_of_contigs = make_snap(sys.argv[1], length)
print_pairs_of_edges(pairs_of_contigs, sys.argv[2])