File: test_Fasta2.py

package info (click to toggle)
python-biopython 1.45-3
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 18,192 kB
  • ctags: 12,310
  • sloc: python: 83,505; xml: 13,834; ansic: 7,015; cpp: 1,855; sql: 1,144; makefile: 179
file content (74 lines) | stat: -rw-r--r-- 2,421 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# Copyright 1999 by Jeffrey Chang.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

import os
import string
from types import *
from Bio import File
from Bio import ParserSupport
from Bio import Fasta
from Bio import Alphabet

def title_to_ids(title):
    """Function to convert a title into the id, name, and description.

    This is just a quick-n-dirty implementation, and is definately not meant
    to handle every FASTA title line case.
    """
    # first split the id information from the description
    # the first item is the id info block, the rest is the description
    all_info = string.split(title, " ")
    id_info = all_info[0]
    rest = all_info[1:]
    descr = string.join(rest, " ")

    # now extract the ids from the id block
    # gi|5690369|gb|AF158246.1|AF158246
    id_info_items = string.split(id_info, "|")
    id = id_info_items[3] # the id with version info
    name = id_info_items[4] # the id without version info

    return id, name, descr

tests = [ 'lupine.nu', 'elderberry.nu', 'phlox.nu', 'centaurea.nu', \
    'wisteria.nu', 'sweetpea.nu', 'lavender.nu' ]
record_parser = Fasta.RecordParser()
sequence_parser = Fasta.SequenceParser(Alphabet.generic_dna, title_to_ids)

for test in tests:
    print "testing %s" % test
    datafile = os.path.join( 'Nucleic', test )
    src_handle = open( datafile )
    data = record_parser.parse( src_handle )
    print data

for test in tests:
    print "testing %s" % test
    datafile = os.path.join( 'Nucleic', test )
    src_handle = open( datafile )
    data = sequence_parser.parse( src_handle )
    print data.id
    print data.name
    print data.description
    print repr(data.seq)

tests = [ 'aster.pro', 'rosemary.pro', 'rose.pro', 'loveliesbleeding.pro' ]
sequence_parser = Fasta.SequenceParser(Alphabet.generic_protein, title_to_ids)
for test in tests:
    print "testing %s" % test
    datafile = os.path.join( 'Amino', test )
    src_handle = open( datafile )
    data = record_parser.parse( src_handle )
    print data

for test in tests:
    print "testing %s" % test
    datafile = os.path.join( 'Amino', test )
    src_handle = open( datafile )
    data = sequence_parser.parse( src_handle )
    print data.id
    print data.name
    print data.description
    print repr(data.seq)