File: test_Fasta2.py

package info (click to toggle)

python-biopython 1.45-3

links: PTS, VCS
area: main
in suites: lenny
size: 18,192 kB
ctags: 12,310
sloc: python: 83,505; xml: 13,834; ansic: 7,015; cpp: 1,855; sql: 1,144; makefile: 179

file content (74 lines) | stat: -rw-r--r-- 2,421 bytes

# Copyright 1999 by Jeffrey Chang.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

import os
import string
from types import *
from Bio import File
from Bio import ParserSupport
from Bio import Fasta
from Bio import Alphabet

def title_to_ids(title):
    """Function to convert a title into the id, name, and description.

    This is just a quick-n-dirty implementation, and is definately not meant
    to handle every FASTA title line case.
    """
    # first split the id information from the description
    # the first item is the id info block, the rest is the description
    all_info = string.split(title, " ")
    id_info = all_info[0]
    rest = all_info[1:]
    descr = string.join(rest, " ")

    # now extract the ids from the id block
    # gi|5690369|gb|AF158246.1|AF158246
    id_info_items = string.split(id_info, "|")
    id = id_info_items[3] # the id with version info
    name = id_info_items[4] # the id without version info

    return id, name, descr

tests = [ 'lupine.nu', 'elderberry.nu', 'phlox.nu', 'centaurea.nu', \
    'wisteria.nu', 'sweetpea.nu', 'lavender.nu' ]
record_parser = Fasta.RecordParser()
sequence_parser = Fasta.SequenceParser(Alphabet.generic_dna, title_to_ids)

for test in tests:
    print "testing %s" % test
    datafile = os.path.join( 'Nucleic', test )
    src_handle = open( datafile )
    data = record_parser.parse( src_handle )
    print data

for test in tests:
    print "testing %s" % test
    datafile = os.path.join( 'Nucleic', test )
    src_handle = open( datafile )
    data = sequence_parser.parse( src_handle )
    print data.id
    print data.name
    print data.description
    print repr(data.seq)

tests = [ 'aster.pro', 'rosemary.pro', 'rose.pro', 'loveliesbleeding.pro' ]
sequence_parser = Fasta.SequenceParser(Alphabet.generic_protein, title_to_ids)
for test in tests:
    print "testing %s" % test
    datafile = os.path.join( 'Amino', test )
    src_handle = open( datafile )
    data = record_parser.parse( src_handle )
    print data

for test in tests:
    print "testing %s" % test
    datafile = os.path.join( 'Amino', test )
    src_handle = open( datafile )
    data = sequence_parser.parse( src_handle )
    print data.id
    print data.name
    print data.description
    print repr(data.seq)