File: discoSnp%2B%2B_to_csv.py

package info (click to toggle)
discosnp 1%3A2.6.2-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,656 kB
  • sloc: python: 5,893; sh: 2,966; cpp: 2,692; makefile: 14
file content (74 lines) | stat: -rwxr-xr-x 1,782 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/python3
import sys
if len(sys.argv) !=2:
    sys.stdout.write("Mandatory: python3 discoSnp_to_csv.py prefix_coherent_k_kval_c_cval.fa\n")
    sys.stdout.write("This program formats the .fa to .csv format by puting each couple of .fa sequence (4 lines = 2 comments + 2 nucleotide sequences) into one line, replacing the '|' character by spaces and removing the CX_ formating")
    sys.exit(1)

f=open(sys.argv[1], "r")


while 1:
    com1_1=f.readline()
    if not com1_1:
        break
    data1_1=f.readline()
    if not data1_1:
        break
    com1_2=f.readline()
    if not com1_2:
        break
    data1_2=f.readline()
    if not data1_2:
        break
    
    com1_tab=com1_1.split("|")
    
    # prints all before coverages
    for i in range(0,4):
        sys.stdout.write( com1_tab[i]+",")
        
    # prints coverages
    i=4
    while com1_tab[i][0:1]!="C" and i<len(com1_tab): 
        i+=1
    while com1_tab[i][0:1]=="C":
        sys.stdout.write( com1_tab[i].split("_")[1]+",")
        i+=1

    # prints all remaining
    while i<len(com1_tab)-1:
        sys.stdout.write( com1_tab[i]+",")
        i+=1
    sys.stdout.write( com1_tab[i][:-1]+",")
    
    sys.stdout.write(data1_1[:-1]+",")
    
    com2_tab=com1_2.split("|")
    
    # prints all before coverages
    for i in range(0,4):
        sys.stdout.write( com2_tab[i]+",")
        
    # prints coverages
    i=4
    while com1_tab[i][0:1]!="C" and i<len(com1_tab): 
        i+=1
    while com2_tab[i][0:1]=="C":
        sys.stdout.write( com2_tab[i].split("_")[1]+",")
        i+=1

    # prints all remaining
    while i<len(com2_tab)-1:
        sys.stdout.write( com2_tab[i]+",")
        i+=1
    sys.stdout.write( com2_tab[i][:-1]+",")
    
    print((data1_2,))