File: targeted_mut_fasta_corrected.py

package info (click to toggle)
discosnp 4.4.4-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 3,264 kB
  • sloc: python: 4,986; cpp: 3,717; sh: 3,205; makefile: 12
file content (68 lines) | stat: -rw-r--r-- 1,837 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# 

import sys
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from time import time


  
def index_reference_mut(mut_file):

    index = {}

    filin = open(mut_file, 'r')    
 
    while True:
        line = filin.readline() 
        if not line: break
        if line.startswith("#") : continue
        
        chr = line.split("\t")[0].strip()
        pos = int(line.split("\t")[1])
        ref = line.split("\t")[2].strip()
        alt = line.split("\t")[3].strip()
        if len(ref) == len(alt):
            if chr not in index:
                index[chr] = {}
            index[chr][pos] = {}
            index[chr][pos]["ref"] = ref
            index[chr][pos]["alt"] = alt 

    #print(len(index[chr]))   
    filin.close()
    return index


def targeted_mut(fasta_file, index):
    
    genome = SeqIO.parse(fasta_file, "fasta")
    fasta_file_mut = ("{}_mut".format(fasta_file))
    nb_mut = 0
    sequences_mut = []
  
    for record in genome:
        chr = record.id
        mutable_seq = record.seq.tomutable()
        #print(mutable_seq[0])
        if chr in index: 
            for pos in index[chr]:
                if mutable_seq[pos - 1] == index[chr][pos]["ref"]:
                    mutable_seq[pos - 1] = index[chr][pos]["alt"]
                    nb_mut += 1
       
        record_mut=SeqRecord(mutable_seq, record.id,'','')
        sequences_mut.append(record_mut)
    SeqIO.write(sequences_mut, fasta_file_mut, "fasta")

    print("{} bases have been mutated".format(nb_mut))

def main(fasta_file, mutation_file):
    index = index_reference_mut(mutation_file)
    targeted_mut(fasta_file, index)

if __name__ == "__main__":
    main(sys.argv[1], sys.argv[2])