1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
#!/usr/bin/python3
# -*- coding: utf-8 -*-
#
import sys
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from time import time
def index_reference_mut(mut_file):
index = {}
filin = open(mut_file, 'r')
while True:
line = filin.readline()
if not line: break
if line.startswith("#") : continue
chr = line.split("\t")[0].strip()
pos = int(line.split("\t")[1])
ref = line.split("\t")[2].strip()
alt = line.split("\t")[3].strip()
if len(ref) == len(alt):
if chr not in index:
index[chr] = {}
index[chr][pos] = {}
index[chr][pos]["ref"] = ref
index[chr][pos]["alt"] = alt
#print(len(index[chr]))
filin.close()
return index
def targeted_mut(fasta_file, index):
genome = SeqIO.parse(fasta_file, "fasta")
fasta_file_mut = ("{}_mut".format(fasta_file))
nb_mut = 0
sequences_mut = []
for record in genome:
chr = record.id
mutable_seq = record.seq.tomutable()
#print(mutable_seq[0])
if chr in index:
for pos in index[chr]:
if mutable_seq[pos - 1] == index[chr][pos]["ref"]:
mutable_seq[pos - 1] = index[chr][pos]["alt"]
nb_mut += 1
record_mut=SeqRecord(mutable_seq, record.id,'','')
sequences_mut.append(record_mut)
SeqIO.write(sequences_mut, fasta_file_mut, "fasta")
print("{} bases have been mutated".format(nb_mut))
def main(fasta_file, mutation_file):
index = index_reference_mut(mutation_file)
targeted_mut(fasta_file, index)
if __name__ == "__main__":
main(sys.argv[1], sys.argv[2])
|