File: xbb_translations.py

package info (click to toggle)
python-biopython 1.64%2Bdfsg-5
links: PTS, VCS
area: main
in suites: jessie, jessie-kfreebsd
size: 44,416 kB
ctags: 12,472
sloc: python: 153,759; xml: 67,286; ansic: 9,003; sql: 1,488; makefile: 144; sh: 59
file content (131 lines) | stat: -rw-r--r-- 4,804 bytes
#!/usr/bin/env python
# Created: Wed Jun 21 15:53:22 2000
# Last changed: Time-stamp: <00/12/02 15:56:27 thomas>
# thomas@cbs.dtu.dk, http://www.cbs.dtu.dk/thomas
# File: xbb_translations.py

from __future__ import print_function

import sys
import time

sys.path.insert(0, '.')

try:
    from Tkinter import * # Python 2
except ImportError:
    from tkinter import * # Python 3


from Bio.Seq import reverse_complement, translate
from Bio.SeqUtils import GC


class xbb_translations:
    def __init__(self):
        ""

    def frame1(self, seq, translation_table=1):
        return translate(seq, table=translation_table)

    def complement(self, seq):
        #TODO - use Seq methods instead of this hack:?
        return reverse_complement(seq)[::-1]

    def reverse(self, seq):
        return seq[::-1]

    def antiparallel(self, seq):
        return reverse_complement(seq)

    def frame(self, seq, frame, translation_table=1):
        if not ((-3 <= frame <= -1) or (1 <= frame <= 3)):
            frame = 1
        if frame != 1:
            raise NotImplementedError
            #TODO - Support the frame argument
            #The old code didn't, but I can guess from
            #the code the expected 1,2,3 for the forward
            #strands and -1,-2,-3 for the reverse.
        return translate(seq, table=translation_table)

    def header_nice(self, txt, seq):
        length = len(seq)
        if length > 20:
            short = '%s ... %s' % (seq[:10], seq[-10:])
        else:
            short = seq

        date = time.strftime('%y %b %d, %X', time.localtime(time.time()))
        res = '%s: %s, ' % (txt, date)

        for nt in ['a', 't', 'g', 'c']:
            res += '%s:%d ' % (nt, seq.count(nt.upper()))

        res += '\nSequence: %s, %d nt, %0.2f %%GC\n' % (short.lower(), length, self.gc(seq))
        res += '\n\n'
        return res

    def frame_nice(self, seq, frame, translation_table=1):
        length = len(seq)
        protein = self.frame(seq, frame, translation_table)
        res = self.header_nice('Plus one frame translation', seq)
        for i in range(0, length, 60):
            subseq = seq[i:i+60]
            p = i/3
            res += '%d/%d\n' % (i+1, i/3+1)
            res += '  '.join(protein[p:p+20]) + '\n'
            # seq
            res += subseq.lower() + '%5d %%\n' % int(self.gc(subseq))

        return res

    def gc(self, seq):
        """Returns a float between 0 and 100."""
        return GC(seq)

    def gcframe(self, seq, translation_table=1):
        # always use uppercase nt-sequence !!
        comp = self.complement(seq)
        anti = self.reverse(comp)
        length = len(seq)
        frames = {}
        for i in range(0, 3):
            frames[i+1] = self.frame1(seq[i:], translation_table)
            frames[-(i+1)] = self.reverse(self.frame1(anti[i:], translation_table))

        res = self.header_nice('GCFrame', seq)

        for i in range(0, length, 60):
            subseq = seq[i:i+60]
            csubseq = comp[i:i+60]
            p = i/3
            # + frames
            res += '%d/%d\n' % (i+1, i/3+1)
            res += '  ' + '  '.join(frames[3][p:p+20]) + '\n'
            res += ' ' + '  '.join(frames[2][p:p+20]) + '\n'
            res += '  '.join(frames[1][p:p+20]) + '\n'
            # seq
            res += subseq.lower() + '%5d %%\n' % int(self.gc(subseq))
            res += csubseq.lower() + '\n'
            # - frames
            res += '  '.join(frames[-2][p:p+20]) + ' \n'
            res += ' ' + '  '.join(frames[-1][p:p+20]) + '\n'
            res += '  ' + '  '.join(frames[-3][p:p+20]) + '\n\n'

        return res

if __name__ == '__main__':
    #s = 'GCCCTTTCTTATTAGTGCTACCGCTAATAGGTAAATATGAAAAACCTTTG'
    s = 'ATTCCGGTTGATCCTGCCGGACCCGACCGCTATCGGGGTAGGGATAAGCCATGGGAGTCTTACACTCCCGGGTAAGGGAGTGTGGCGGACGGCTGAGTAACACGTGGCTAACCTACCCTCGGGACGGGGATAACCCCGGGAAACTGGGGATAATCCCCGATAGGGAAGGAGTCCTGGAATGGTTCCTTCCCTAAAGGGCTATAGGCTATTTCCCGTTTGTAGCCGCCCGAGGATGGGGCTACGGCCCATCAGGCTGTCGGTGGGGTAAAGGCCCACCGAACCTATAACGGGTAGGGGCCGTGGAAGCGGGAGCCTCCAGTTGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCACCAGGCGCGAAACGTCCCCAATGCGCGAAAGCGTGAGGGCGCTACCCCGAGTGCCTCCGCAAGGAGGCTTTTCCCCGCTCTAAAAAGGCGGGGGAATAAGCGGGGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCTCCGCGAGTGGTCGGGGTGATTACTGGGCCTAAAGCGCCTGTAGCCGGCCCACCAAGTCGCCCCTTAAAGTCCCCGGCTCAACCGGGGAACTGGGGGCGATACTGGTGGGCTAGGGGGCGGGAGAGGCGGGGGGTACTCCCGGAGTAGGGGCGAAATCCTTAGATACCGGGAGGACCACCAGTGGCGGAAGCGCCCCGCTA'

    test = xbb_translations()
#    for i in range(0, 4):
#        print(test.frame1(s[i:]))
    #print(s)
    #print(test.complement(s))
    print('============================================================')
    print(test.gcframe(s))

#    for i in Translate.unambiguous_dna_by_id:
#        print(Translate.unambiguous_dna_by_id[i].table.names[0])