1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
|
import string
from Bio import Alphabet, Seq
from Bio.Data import CodonTable
class Translator:
def __init__(self, table):
self.table = table
self._encoded = {}
def translate(self, seq, stop_symbol = "*"):
assert seq.alphabet == self.table.nucleotide_alphabet, \
"cannot translate from the given alphabet (%s)" % seq.alphabet
s = seq.data
letters = []
append = letters.append
table = self.table
get = table.forward_table.get
n = len(seq)
for i in range(0, n-n%3, 3):
append(get(s[i:i+3], stop_symbol))
# return with the correct alphabet encoding (cache the encoding)
try:
alphabet = self._encoded[stop_symbol]
except KeyError:
alphabet = Alphabet.HasStopCodon(table.protein_alphabet)
self._encoded[stop_symbol] = alphabet
return Seq.Seq(string.join(letters, ""), alphabet)
def translate_to_stop(self, seq):
# This doesn't have a stop encoding
assert seq.alphabet == self.table.nucleotide_alphabet, \
"cannot translate from given alphabet (have %s, need %s)" %\
(seq.alphabet, self.table.nucleotide_alphabet)
s = seq.data
letters = []
append = letters.append
table = self.table.forward_table
n = len(seq)
try:
for i in range(0, n-n%3, 3):
append(table[s[i:i+3]])
except KeyError:
# Stop at the first codon failure
pass
return Seq.Seq(string.join(letters, ""), self.table.protein_alphabet)
def back_translate(self, seq):
# includes the stop codon
if not isinstance(seq.alphabet, Alphabet.HasStopCodon):
return self._back_translate_no_stop(seq)
assert seq.alphabet.alphabet == self.table.protein_alphabet, \
"cannot back translate from the given alphabet (%s)" % \
seq.alphabet.alphabet
s = seq.data
letter = seq.alphabet.stop_symbol
letters = []
append = letters.append
table = self.table.back_table
for c in seq.data:
if c == letter:
append(table[None])
else:
append(table[c])
return Seq.Seq(string.join(letters, ""),
self.table.nucleotide_alphabet)
def _back_translate_no_stop(self, seq):
# does not allow a stop codon
assert seq.alphabet == self.table.protein_alphabet, \
"cannot back translate from the given alphabet (%s)" % \
seq.alphabet
s = seq.data
letters = []
append = letters.append
table = self.table.back_table
for c in seq.data:
append(table[c])
return Seq.Seq(string.join(letters, ""),
self.table.nucleotide_alphabet)
unambiguous_dna_by_name = {}
for key, value in CodonTable.unambiguous_dna_by_name.items():
unambiguous_dna_by_name[key] = Translator(value)
unambiguous_dna_by_id = {}
for key, value in CodonTable.unambiguous_dna_by_id.items():
unambiguous_dna_by_id[key] = Translator(value)
unambiguous_rna_by_name = {}
for key, value in CodonTable.unambiguous_rna_by_name.items():
unambiguous_rna_by_name[key] = Translator(value)
unambiguous_rna_by_id = {}
for key, value in CodonTable.unambiguous_rna_by_id.items():
unambiguous_rna_by_id[key] = Translator(value)
# XXX Ambiguous - can be done the same except for stop codons!
ambiguous_dna_by_name = {}
for key, value in CodonTable.ambiguous_dna_by_name.items():
ambiguous_dna_by_name[key] = Translator(value)
ambiguous_dna_by_id = {}
for key, value in CodonTable.ambiguous_dna_by_id.items():
ambiguous_dna_by_id[key] = Translator(value)
ambiguous_rna_by_name = {}
for key, value in CodonTable.ambiguous_rna_by_name.items():
ambiguous_rna_by_name[key] = Translator(value)
ambiguous_rna_by_id = {}
for key, value in CodonTable.ambiguous_rna_by_id.items():
ambiguous_rna_by_id[key] = Translator(value)
|