File: zero2one.py

package info (click to toggle)
discosnp 1%3A2.6.2-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,656 kB
  • sloc: python: 5,893; sh: 2,966; cpp: 2,692; makefile: 14
file content (39 lines) | stat: -rw-r--r-- 2,359 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import argparse
import fileinput

def zero2one(input_vcf_file_name: str):
    with fileinput.FileInput(input_vcf_file_name, inplace = True) as vcf_file:
        for vcf_line in vcf_file:
            if vcf_line[0] == "#":
                print(vcf_line, end='')
                continue
            # From
            #121_cassette_SSV9_CAG-eGFP-ISce-PASV40      3163     8622   A       C       .       MULTIPLE        Ty=SNP;Rk=8.0508e-05;UL=0;UR=0;CL=0;CR=0;Genome=A;Sd=1;XA=121_cassette_SSV9_CAG-eGFP-ISce-PASV40_17,121_cassette_SSV9_CAG-eGFP-ISce-PASV40_3081,121_cassette_SSV9_CAG-eGFP-ISce-PASV40_79  GT:DP:PL:AD:HQ  ./.:2:.,.,.:2,0:0,0     0/0:291967:8727,869383,5824307:291691,276:71,48
            # To 
            #121_cassette_SSV9_CAG-eGFP-ISce-PASV40      3163     8622   A       C       .       MULTIPLE        Ty=SNP;Rk=8.0508e-05;UL=0;UR=0;CL=0;CR=0;Genome=A;Sd=1;XA=121_cassette_SSV9_CAG-eGFP-ISce-PASV40_18,121_cassette_SSV9_CAG-eGFP-ISce-PASV40_3082,121_cassette_SSV9_CAG-eGFP-ISce-PASV40_80  GT:DP:PL:AD:HQ  ./.:2:.,.,.:2,0:0,0     0/0:291967:8727,869383,5824307:291691,276:71,48
            vcf_line = vcf_line.strip().split()
            new_POS = int(vcf_line[1]) + 1
            ID_REF_ALT_QUAL_FILTER = '\t'.join(vcf_line[2:7])
            INFO = vcf_line[7]
            # zero to one for XA positions
            s_INFO = INFO.split(';')
            if s_INFO[-1].startswith("XA="):
                XA="XA="
                all_XAs = s_INFO[-1].split("=")[-1].split(',')
                for current_XA in all_XAs:
                    pos = int(current_XA.split("_")[-1]) + 1
                    XA += "_".join(current_XA.split("_")[:-1])+"_"+str(pos)+","
                XA = XA[:-1] # remove additional ','
                INFO = ';'.join(s_INFO[:-1]) + ";" + XA
                    
                    
            end_line = '\t'.join(vcf_line[8:])
            new_vcf_line = f"{vcf_line[0]}\t{new_POS}\t{ID_REF_ALT_QUAL_FILTER}\t{INFO}\t{end_line}"
            print(new_vcf_line)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", help="name of input zero-based vcf file (will be overwritten!)", dest='input_vcf_file', type=str, required=True)
    args = parser.parse_args()
    zero2one(args.input_vcf_file)
    print(f"Transformed zero-based {args.input_vcf_file} to one-based")