File: Des.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (116 lines) | stat: -rwxr-xr-x 3,693 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Copyright 2001 by Gavin E. Crooks.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.


""" Handle the SCOP DEScription file.

The file format is described in the scop
"release notes.":http://scop.berkeley.edu/release-notes-1.55.html 
The latest DES file can be found
"elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
  
"Release 1.55":http://scop.berkeley.edu/parse/des.cla.scop.txt_1.55 (July 2001)
"""

from types import *

class Record:
    """Holds information for one node in the SCOP hierarchy.

    sunid       -- SCOP unique identifiers

    nodetype    -- One of 'cl' (class), 'cf' (fold), 'sf' (superfamily),
                   'fa' (family), 'dm' (protein), 'sp' (species),
                   'px' (domain). Additional node types may be added.

    sccs        -- SCOP concise classification strings. e.g. b.1.2.1

    name        -- The SCOP ID (sid) for domains (e.g. d1anu1),
                   currently empty for other node types

    description --  e.g. "All beta proteins","Fibronectin type III", 
    
    """
    def __init__(self):
        self.sunid = ''
        self.nodetype = ''
        self.sccs = ''
        self.name = ''
        self.description =''
        
    def __str__(self):
        s = []
        s.append(self.sunid)
        s.append(self.nodetype)        
        s.append(self.sccs)        
        if self.name :
            s.append(self.name)
        else :
            s.append("-")
        s.append(self.description)        
        return "\t".join(map(str,s)) + "\n"

class Iterator:
    """Iterates over a DES file.
    """
    def __init__(self, handle, parser=None):
        """Create an object that iterates over a DES file.

        handle -- file-like object.

        parser -- an optional Parser object to chang the results into
                  another form.  If set to None, then the raw contents
                  of the file will be returned.
                  
        """
        if type(handle) is not FileType and type(handle) is not InstanceType:
            raise TypeError, "I expected a file handle or file-like object"
        self._handle = handle
        self._parser = parser

    def next(self):
        """Retrieve the next DES record."""
        while 1:
            line = self._handle.readline()
            if not line: return None
            if line[0] !='#':  break  # Not a comment line
        if self._parser is not None :    
            return self._parser.parse(line)
        return line
    
    def __iter__(self):
        return iter(self.next, None)


class Parser:
    """Parses DES records.
    
    Records consist of 5 tab deliminated fields,
    sunid, node type, sccs, node name, node description.
    """
    #For example ::
    #
    #21953   px      b.1.2.1 d1dan.1 1dan T:,U:91-106
    #48724   cl      b       -       All beta proteins
    #48725   cf      b.1     -       Immunoglobulin-like beta-sandwich
    #49265   sf      b.1.2   -       Fibronectin type III
    #49266   fa      b.1.2.1 -       Fibronectin type III

    def parse(self, entry):
        """Returns a Des Record """
        entry = entry.rstrip()  # no trailing whitespace
        columns = entry.split("\t")  # separate the tab-delineated cols
        if len(columns) != 5:
            raise SyntaxError, "I don't understand the format of %s" % entry
        
        rec = Record()
        rec.sunid, rec.nodetype, rec.sccs, rec.name, rec.description = columns
        if rec.name == '-' : rec.name =''
        rec.sunid = int(rec.sunid)
        return rec