1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
|
#!/usr/bin/env python
# Copyright 2000 by Thomas Sicheritz-Ponten.
# Copyright 2016 by Markus Piotrowski.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
# Created: Wed Jun 21 15:53:22 2000
# thomas@cbs.dtu.dk, http://www.cbs.dtu.dk/thomas
"""Translation code for graphical Xbbtools tool."""
import time
from Bio.Seq import reverse_complement
from Bio.Seq import Seq
from Bio.Seq import translate
from Bio.SeqUtils import gc_fraction
class xbb_translations:
"""A class for doing translations."""
def __init__(self):
"""Initialize the class."""
def frame1(self, seq, translation_table=1):
"""Translate first reading frame."""
return translate(seq, table=translation_table)
def complement(self, seq):
"""Return complementary DNA Seq object."""
return Seq.complement(Seq(seq))
def reverse(self, seq):
"""Reverse the sequence."""
return seq[::-1]
def antiparallel(self, seq):
"""Return reverse complementary sequence."""
return reverse_complement(seq)
def frame(self, seq, frame, translation_table=1):
"""Translate DNA sequence in a chosen frame."""
if frame < 0:
seq = reverse_complement(seq)
seq = seq[(abs(frame) - 1) :]
return translate(seq, table=translation_table)
def header_nice(self, txt, seq):
"""Print a short header for the translation window."""
length = len(seq)
if length > 20:
short = f"{seq[:10]} ... {seq[-10:]}"
else:
short = seq
date = time.strftime("%y %b %d, %X", time.localtime(time.time()))
res = f"{txt}: {date}, "
for nt in ["a", "t", "g", "c"]:
res += f"{nt}:{seq.count(nt.upper()):d} "
res += f"\nSequence: {short.lower()}, {length:d} nt, %0.2f %{self.gc(seq):G}C\n"
res += "\n\n"
return res
def frame_nice(self, seq, frame, translation_table=1):
"""Print a pretty print single frame translation."""
length = len(seq)
protein = self.frame(seq, frame, translation_table)
protein_length = len(protein)
protein = " ".join(list(protein))
protein += (((length - (abs(frame) - 1)) % 3) + 2) * " "
if frame < 0:
protein = protein[::-1]
res = self.header_nice(f"Frame {frame} translation", seq)
for i in range(0, length, 60):
subseq = seq[i : i + 60]
p = i // 3
if frame > 0:
res += "%d/%d\n" % (i + 1, p + 1)
res += " " * (frame - 1) + protein[i : i + 60] + "\n"
# seq
res += subseq.lower() + "%5d %%\n" % int(self.gc(subseq)) + "\n"
else:
res += "%d/%d\n" % (i + 1, protein_length - len(protein[:i].split()))
# seq
res += subseq.lower() + "%5d %%\n" % int(self.gc(subseq))
res += protein[i : i + 60] + "\n\n"
return res
def gc(self, seq):
"""Calculate GC content in percent (0-100)."""
return 100 * gc_fraction(seq)
def gcframe(self, seq, translation_table=1, direction="both"):
"""Print a pretty print translation in several frames."""
# always use uppercase nt-sequence !!
comp = self.complement(seq)
anti = self.reverse(comp)
length = len(seq)
frames = {}
for i in range(3):
frames[i + 1] = self.frame1(seq[i:], translation_table)
frames[-(i + 1)] = self.reverse(self.frame1(anti[i:], translation_table))
res = self.header_nice("GCFrame", seq)
for i in range(0, length, 60):
subseq = seq[i : i + 60]
csubseq = comp[i : i + 60]
p = i // 3
if direction in ("plus", "both"):
# + frames
res += "%d/%d\n" % (i + 1, i // 3 + 1)
res += " " + " ".join(frames[3][p : p + 20]) + "\n"
res += " " + " ".join(frames[2][p : p + 20]) + "\n"
res += " ".join(frames[1][p : p + 20]) + "\n"
# seq
res += subseq.lower() + "%5d %%\n" % int(self.gc(subseq))
res += csubseq.lower() + "\n"
if direction == "plus":
res += "\n"
if direction in ("minus", "both"):
# - frames
res += " ".join(frames[-2][p : p + 20]) + " \n"
res += " " + " ".join(frames[-1][p : p + 20]) + "\n"
res += " " + " ".join(frames[-3][p : p + 20]) + "\n\n"
return res
if __name__ == "__main__":
seq = (
"ATTCCGGTTGATCCTGCCGGACCCGACCGCTATCGGGGTAGGGATAAGCCATGGGAGTCT"
"TACACTCCCGGGTAAGGGAGTGTGGCGGACGGCTGAGTAACACGTGGCTAACCTACCCTC"
"GGGACGGGGATAACCCCGGGAAACTGGGGATAATCCCCGATAGGGAAGGAGTCCTGGAAT"
"GGTTCCTTCCCTAAAGGGCTATAGGCTATTTCCCGTTTGTAGCCGCCCGAGGATGGGGCT"
"ACGGCCCATCAGGCTGTCGGTGGGGTAAAGGCCCACCGAACCTATAACGGGTAGGGGCCG"
"TGGAAGCGGGAGCCTCCAGTTGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCACC"
"AGGCGCGAAACGTCCCCAATGCGCGAAAGCGTGAGGGCGCTACCCCGAGTGCCTCCGCAA"
"GGAGGCTTTTCCCCGCTCTAAAAAGGCGGGGGAATAAGCGGGGGGCAAGTCTGGTGTCAG"
"CCGCCGCGGTAATACCAGCTCCGCGAGTGGTCGGGGTGATTACTGGGCCTAAAGCGCCTG"
"TAGCCGGCCCACCAAGTCGCCCCTTAAAGTCCCCGGCTCAACCGGGGAACTGGGGGCGAT"
"ACTGGTGGGCTAGGGGGCGGGAGAGGCGGGGGGTACTCCCGGAGTAGGGGCGAAATCCTT"
"AGATACCGGGAGGACCACCAGTGGCGGAAGCGCCCCGCTA"
)
test = xbb_translations()
print("============================================================")
print(test.gcframe(seq))
|