1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
|
#!/usr/bin/env python
# Copyright 2000 by Thomas Sicheritz-Ponten.
# Copyright 2016 by Markus Piotrowski.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
# Created: Wed Jun 21 15:53:22 2000
# thomas@cbs.dtu.dk, http://www.cbs.dtu.dk/thomas
# File: xbb_translations.py
from __future__ import print_function
import time
from Bio.Seq import Seq, reverse_complement, translate
from Bio.SeqUtils import GC
class xbb_translations(object):
def __init__(self):
pass
def frame1(self, seq, translation_table=1):
return translate(seq, table=translation_table)
def complement(self, seq):
return Seq.complement(Seq(seq))
def reverse(self, seq):
return seq[::-1]
def antiparallel(self, seq):
return reverse_complement(seq)
def frame(self, seq, frame, translation_table=1):
if frame < 0:
seq = reverse_complement(seq)
seq = seq[(abs(frame) - 1):]
return translate(seq, table=translation_table)
def header_nice(self, txt, seq):
length = len(seq)
if length > 20:
short = '%s ... %s' % (seq[:10], seq[-10:])
else:
short = seq
date = time.strftime('%y %b %d, %X', time.localtime(time.time()))
res = '%s: %s, ' % (txt, date)
for nt in ['a', 't', 'g', 'c']:
res += '%s:%d ' % (nt, seq.count(nt.upper()))
res += '\nSequence: %s, %d nt, %0.2f %%GC\n' % (short.lower(),
length, self.gc(seq))
res += '\n\n'
return res
def frame_nice(self, seq, frame, translation_table=1):
length = len(seq)
protein = self.frame(seq, frame, translation_table)
protein_length = len(protein)
protein = ' '.join([aa for aa in protein])
protein += (((length - (abs(frame) - 1)) % 3) + 2) * ' '
if frame < 0:
protein = protein[::-1]
res = self.header_nice('Frame {} translation'.format(frame), seq)
for i in range(0, length, 60):
subseq = seq[i:i + 60]
p = i // 3
if frame > 0:
res += '%d/%d\n' % (i + 1, p + 1)
res += ' ' * (frame - 1) + protein[i:i + 60] + '\n'
# seq
res += (subseq.lower() + '%5d %%\n' % int(self.gc(subseq)) +
'\n')
else:
res += '%d/%d\n' % (i + 1, protein_length -
len(protein[:i].split()))
# seq
res += subseq.lower() + '%5d %%\n' % int(self.gc(subseq))
res += protein[i:i + 60] + '\n\n'
return res
def gc(self, seq):
"""Returns a float between 0 and 100."""
return GC(seq)
def gcframe(self, seq, translation_table=1, direction='both'):
# always use uppercase nt-sequence !!
comp = self.complement(seq)
anti = self.reverse(comp)
length = len(seq)
frames = {}
for i in range(0, 3):
frames[i + 1] = self.frame1(seq[i:], translation_table)
frames[-(i + 1)] = self.reverse(self.frame1(anti[i:],
translation_table))
res = self.header_nice('GCFrame', seq)
for i in range(0, length, 60):
subseq = seq[i:i + 60]
csubseq = comp[i:i + 60]
p = i // 3
if direction in ('plus', 'both'):
# + frames
res += '%d/%d\n' % (i + 1, i // 3 + 1)
res += ' ' + ' '.join(frames[3][p:p + 20]) + '\n'
res += ' ' + ' '.join(frames[2][p:p + 20]) + '\n'
res += ' '.join(frames[1][p:p + 20]) + '\n'
# seq
res += subseq.lower() + '%5d %%\n' % int(self.gc(subseq))
res += csubseq.lower() + '\n'
if direction == 'plus':
res += '\n'
if direction in ('minus', 'both'):
# - frames
res += ' '.join(frames[-2][p:p + 20]) + ' \n'
res += ' ' + ' '.join(frames[-1][p:p + 20]) + '\n'
res += ' ' + ' '.join(frames[-3][p:p + 20]) + '\n\n'
return res
if __name__ == '__main__':
s = 'ATTCCGGTTGATCCTGCCGGACCCGACCGCTATCGGGGTAGGGATAAGCCATGGGAGTCT' \
'TACACTCCCGGGTAAGGGAGTGTGGCGGACGGCTGAGTAACACGTGGCTAACCTACCCTC' \
'GGGACGGGGATAACCCCGGGAAACTGGGGATAATCCCCGATAGGGAAGGAGTCCTGGAAT' \
'GGTTCCTTCCCTAAAGGGCTATAGGCTATTTCCCGTTTGTAGCCGCCCGAGGATGGGGCT' \
'ACGGCCCATCAGGCTGTCGGTGGGGTAAAGGCCCACCGAACCTATAACGGGTAGGGGCCG' \
'TGGAAGCGGGAGCCTCCAGTTGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCACC' \
'AGGCGCGAAACGTCCCCAATGCGCGAAAGCGTGAGGGCGCTACCCCGAGTGCCTCCGCAA' \
'GGAGGCTTTTCCCCGCTCTAAAAAGGCGGGGGAATAAGCGGGGGGCAAGTCTGGTGTCAG' \
'CCGCCGCGGTAATACCAGCTCCGCGAGTGGTCGGGGTGATTACTGGGCCTAAAGCGCCTG' \
'TAGCCGGCCCACCAAGTCGCCCCTTAAAGTCCCCGGCTCAACCGGGGAACTGGGGGCGAT' \
'ACTGGTGGGCTAGGGGGCGGGAGAGGCGGGGGGTACTCCCGGAGTAGGGGCGAAATCCTT' \
'AGATACCGGGAGGACCACCAGTGGCGGAAGCGCCCCGCTA'
test = xbb_translations()
print('============================================================')
print(test.gcframe(s))
|