1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
#!/usr/bin/env python
# Created: Wed Jun 21 15:53:22 2000
# Last changed: Time-stamp: <00/12/02 15:56:27 thomas>
# thomas@cbs.dtu.dk, http://www.cbs.dtu.dk/thomas
# File: xbb_translations.py
from __future__ import print_function
import sys
import time
sys.path.insert(0, '.')
try:
from Tkinter import * # Python 2
except ImportError:
from tkinter import * # Python 3
from Bio.Seq import reverse_complement, translate
from Bio.SeqUtils import GC
class xbb_translations:
def __init__(self):
""
def frame1(self, seq, translation_table=1):
return translate(seq, table=translation_table)
def complement(self, seq):
#TODO - use Seq methods instead of this hack:?
return reverse_complement(seq)[::-1]
def reverse(self, seq):
return seq[::-1]
def antiparallel(self, seq):
return reverse_complement(seq)
def frame(self, seq, frame, translation_table=1):
if not ((-3 <= frame <= -1) or (1 <= frame <= 3)):
frame = 1
if frame != 1:
raise NotImplementedError
#TODO - Support the frame argument
#The old code didn't, but I can guess from
#the code the expected 1,2,3 for the forward
#strands and -1,-2,-3 for the reverse.
return translate(seq, table=translation_table)
def header_nice(self, txt, seq):
length = len(seq)
if length > 20:
short = '%s ... %s' % (seq[:10], seq[-10:])
else:
short = seq
date = time.strftime('%y %b %d, %X', time.localtime(time.time()))
res = '%s: %s, ' % (txt, date)
for nt in ['a', 't', 'g', 'c']:
res += '%s:%d ' % (nt, seq.count(nt.upper()))
res += '\nSequence: %s, %d nt, %0.2f %%GC\n' % (short.lower(), length, self.gc(seq))
res += '\n\n'
return res
def frame_nice(self, seq, frame, translation_table=1):
length = len(seq)
protein = self.frame(seq, frame, translation_table)
res = self.header_nice('Plus one frame translation', seq)
for i in range(0, length, 60):
subseq = seq[i:i+60]
p = i/3
res += '%d/%d\n' % (i+1, i/3+1)
res += ' '.join(protein[p:p+20]) + '\n'
# seq
res += subseq.lower() + '%5d %%\n' % int(self.gc(subseq))
return res
def gc(self, seq):
"""Returns a float between 0 and 100."""
return GC(seq)
def gcframe(self, seq, translation_table=1):
# always use uppercase nt-sequence !!
comp = self.complement(seq)
anti = self.reverse(comp)
length = len(seq)
frames = {}
for i in range(0, 3):
frames[i+1] = self.frame1(seq[i:], translation_table)
frames[-(i+1)] = self.reverse(self.frame1(anti[i:], translation_table))
res = self.header_nice('GCFrame', seq)
for i in range(0, length, 60):
subseq = seq[i:i+60]
csubseq = comp[i:i+60]
p = i/3
# + frames
res += '%d/%d\n' % (i+1, i/3+1)
res += ' ' + ' '.join(frames[3][p:p+20]) + '\n'
res += ' ' + ' '.join(frames[2][p:p+20]) + '\n'
res += ' '.join(frames[1][p:p+20]) + '\n'
# seq
res += subseq.lower() + '%5d %%\n' % int(self.gc(subseq))
res += csubseq.lower() + '\n'
# - frames
res += ' '.join(frames[-2][p:p+20]) + ' \n'
res += ' ' + ' '.join(frames[-1][p:p+20]) + '\n'
res += ' ' + ' '.join(frames[-3][p:p+20]) + '\n\n'
return res
if __name__ == '__main__':
#s = 'GCCCTTTCTTATTAGTGCTACCGCTAATAGGTAAATATGAAAAACCTTTG'
s = 'ATTCCGGTTGATCCTGCCGGACCCGACCGCTATCGGGGTAGGGATAAGCCATGGGAGTCTTACACTCCCGGGTAAGGGAGTGTGGCGGACGGCTGAGTAACACGTGGCTAACCTACCCTCGGGACGGGGATAACCCCGGGAAACTGGGGATAATCCCCGATAGGGAAGGAGTCCTGGAATGGTTCCTTCCCTAAAGGGCTATAGGCTATTTCCCGTTTGTAGCCGCCCGAGGATGGGGCTACGGCCCATCAGGCTGTCGGTGGGGTAAAGGCCCACCGAACCTATAACGGGTAGGGGCCGTGGAAGCGGGAGCCTCCAGTTGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCACCAGGCGCGAAACGTCCCCAATGCGCGAAAGCGTGAGGGCGCTACCCCGAGTGCCTCCGCAAGGAGGCTTTTCCCCGCTCTAAAAAGGCGGGGGAATAAGCGGGGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCTCCGCGAGTGGTCGGGGTGATTACTGGGCCTAAAGCGCCTGTAGCCGGCCCACCAAGTCGCCCCTTAAAGTCCCCGGCTCAACCGGGGAACTGGGGGCGATACTGGTGGGCTAGGGGGCGGGAGAGGCGGGGGGTACTCCCGGAGTAGGGGCGAAATCCTTAGATACCGGGAGGACCACCAGTGGCGGAAGCGCCCCGCTA'
test = xbb_translations()
# for i in range(0, 4):
# print(test.frame1(s[i:]))
#print(s)
#print(test.complement(s))
print('============================================================')
print(test.gcframe(s))
# for i in Translate.unambiguous_dna_by_id:
# print(Translate.unambiguous_dna_by_id[i].table.names[0])
|