1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
|
# Copyright 2001 by Katharine Lindner. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Martel based parser to read IntelliGenetics formatted files.
This is a huge regular regular expression for IntelliGenetics, built using
the 'regular expressiona on steroids' capabilities of Martel.
http://hiv-web.lanl.gov/ALIGN_97/HIV12SIV-index.html
Notes:
Just so I remember -- the new end of line syntax is:
New regexp syntax - \R
\R means "\n|\r\n?"
[\R] means "[\n\r]"
This helps us have endlines be consistent across platforms.
"""
# standard library
import string
from Bio.Seq import Seq
"""Hold IntelliGenetics data in a straightforward format.
classes:
o Record - All of the information in an IntelliGenetics record.
"""
class Record:
"""Hold IntelliGenetics information in a format similar to the original record.
The Record class is meant to make data easy to get to when you are
just interested in looking at GenBank data.
Attributes:
comments
title
sequence
"""
def __init__(self):
self.comments = []
self.title = ''
self.sequence = Seq('')
def __str__( self ):
output = 'Title: %s\n' % self.title
for comment in self.comments:
output = output + '%s\n' % comment
output = output + out_sequence( self.sequence.data )
return output
def out_sequence( seq ):
output = ''
for j in range( 0, len( seq ), 80 ):
output = output + '%s\n' % seq[ j: j + 80 ]
output = output + '\n'
return output
|