1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
# Make sure the translation functions work.
# Start simple - unambiguous DNA to unambiguous protein
from __future__ import print_function
from Bio import Seq
from Bio import Alphabet
from Bio.Alphabet import IUPAC
# First, test the transcription functions
s = "ATA"
dna = Seq.Seq(s, IUPAC.unambiguous_dna)
rna = dna.transcribe()
assert str(rna) == "AUA"
s = "GAAAATTCATTTTCTTTGGACTTTCTCTGAAATCCGAGTCCTAGGAAAGATGCGTGAGATTCTTCATATT"
dna = Seq.Seq(s, IUPAC.unambiguous_dna)
rna = dna.transcribe()
assert str(rna) == 'GAAAAUUCAUUUUCUUUGGACUUUCUCUGAAAUCCGAGUCCUAGGAAAGAUGCGUGAGAUUCUUCAUAUU'
s = "GAAAAUUCAUUUUCUUUGGACUUUCUCUGAAAUCCGAGUCCUAGGAAAGAUGCGUGAGAUUCUUCAUAUU"
rna = Seq.Seq(s, IUPAC.unambiguous_rna)
dna = rna.back_transcribe()
assert str(dna) == 'GAAAATTCATTTTCTTTGGACTTTCTCTGAAATCCGAGTCCTAGGAAAGATGCGTGAGATTCTTCATATT'
# use the standard table
# Do some simple tests first
s = ""
dna = Seq.Seq(s, IUPAC.unambiguous_dna)
protein = dna.translate(to_stop=True)
assert str(protein) == ""
s = "TAA"
dna = Seq.Seq(s, IUPAC.unambiguous_dna)
protein = dna.translate(to_stop=True)
assert str(protein) == ""
s = "GAAAATTCATTTTCTTTGGACTTTCTCTGAAATCCGAGTCCTAGGAAAGATGCGTGAGATTCTTCA"
dna = Seq.Seq(s, IUPAC.unambiguous_dna)
protein = dna.translate(to_stop=True)
assert str(protein) == 'ENSFSLDFL'
s = "GAA"
dna = Seq.Seq(s, IUPAC.unambiguous_dna)
protein = dna.translate(15, to_stop=True)
assert str(protein) == "E"
s = "ATA"
dna = Seq.Seq(s, IUPAC.unambiguous_dna)
protein = dna.translate('Vertebrate Mitochondrial', to_stop=True)
assert str(protein) == "M"
s = "GAAAATTCATTTTCTTTGGACTTTCTCTGAAATCCGAGTCCTAGGAAAGATGCGTGAGATTCTTCATAT"
dna = Seq.Seq(s, IUPAC.unambiguous_dna)
protein = dna.translate('SGC8', to_stop=True)
assert str(protein) == 'ENSFSLDFLWNPSPSNDAWDSSY'
# use the standard table
s = "TCAAAAAGGTGCATCTAGATG"
print("Starting with %s" % s)
dna = Seq.Seq(s, IUPAC.unambiguous_dna)
protein = dna.translate(to_stop=True)
assert isinstance(protein.alphabet, IUPAC.IUPACProtein)
print("%i ungapped residues translated" % len(protein))
gapped_protein = dna.translate()
assert isinstance(gapped_protein.alphabet, Alphabet.HasStopCodon)
print(str(protein))
print("%i residues translated, including gaps" % len(gapped_protein))
print(str(gapped_protein))
# This has "AGG" as a stop codon
p2 = dna.translate(table=2, to_stop=True)
print("%i SGC1 has a stop codon" % len(p2))
print(str(p2))
p2 = dna.translate(table=2)
print("Actually, there are %i stops." % p2.count("*"))
print(str(p2))
# Make sure I can change the stop character
p2 = dna.translate(table=2, stop_symbol="+")
print("Yep, %i stops." % p2.count("+"))
print(str(p2))
# Some of the same things, with RNA
# (The code is the same, so I'm not doing all of the tests.)
rna = Seq.Seq(s.replace("T", "U"), IUPAC.unambiguous_rna)
protein_from_rna = rna.translate(to_stop=True)
assert protein.alphabet is protein_from_rna.alphabet
assert str(protein) == str(protein_from_rna)
print("RNA translation ... works.")
gapped_protein_from_rna = rna.translate()
assert len(gapped_protein) == len(gapped_protein_from_rna)
assert str(gapped_protein) == str(gapped_protein_from_rna)
print("RNA translation to stop ... works.")
# some tests for "by name"
# How about some forward ambiguity?
print("Forward ambiguous")
s = "RATGATTARAATYTA"
# B D * N L
dna = Seq.Seq(s, IUPAC.ambiguous_dna)
protein = dna.translate('Vertebrate Mitochondrial')
print(str(protein))
stop_protein = dna.translate('SGC1', to_stop=True)
print(str(stop_protein))
# XXX (Backwards with ambiguity code is unfinished!)
|