File: Hie.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (136 lines) | stat: -rw-r--r-- 3,594 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Copyright 2001 by Gavin E. Crooks.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.


""" Handle the SCOP HIErarchy files, which describe the SCOP hierarchy in
terms of SCOP unique identifiers (sunid).

The file format is described in the scop
"release notes.":http://scop.berkeley.edu/release-notes-1.55.html 
The latest HIE file can be found
"elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
  
"Release 1.55":http://scop.berkeley.edu/parse/dir.hie.scop.txt_1.55 (July 2001)
"""


from types import *

class Record:
    """Holds information for one node in the SCOP hierarchy.

    sunid      -- SCOP unique identifiers of this node

    parent     --  Parents sunid

    children   -- Sequence of childrens sunids
    """
    def __init__(self):
        self.sunid = ''
        self.parent = ''
        self.children = []
        
    def __str__(self):
        s = []
        s.append(str(self.sunid))

        if self.parent:
            s.append(str(self.parent))
        else:
            if self.sunid != 0:
                s.append('0')
            else:
                s.append('-')
                

        if self.children :
            child_str = map(str, self.children)
            s.append(",".join(child_str))
        else:
            s.append('-')

        return "\t".join(s) + "\n"


class Iterator:
    """Iterates over a HIE file.
    """
    def __init__(self, handle, parser=None):
        """Create an object that iterates over a HIE file.

        handle -- file-like object.

        parser -- an optional Parser object to change the results into
                  another form.  If set to None, then the raw contents
                  of the file will be returned.
                  
        """
        if type(handle) is not FileType and type(handle) is not InstanceType:
            raise TypeError, "I expected a file handle or file-like object"
        self._handle = handle
        self._parser = parser

    def next(self):
        """Retrieve the next HIE record."""
        while 1:
            line = self._handle.readline()
            if not line: return None
            if line[0] !='#':  break  # Not a comment line
        if self._parser is not None :    
            return self._parser.parse(line)
        return line
    
    def __iter__(self):
        return iter(self.next, None)


class Parser:
    """Parses HIE records.

    Records consist of 3 tab deliminated fields; node's sunid,
    parent's sunid, and a list of children's sunids.
    """
    #For example ::
    #
    #0       -       46456,48724,51349,53931,56572,56835,56992,57942
    #21953   49268   -
    #49267   49266   49268,49269
    def parse(self, entry):
        """Returns a Hie Record """
        entry = entry.rstrip()        # no trailing whitespace
        columns = entry.split('\t')   # separate the tab-delineated cols
        if len(columns) != 3:
            raise SyntaxError, "I don't understand the format of %s" % entry
        
        rec = Record()
        rec.sunid, rec.parent, children = columns

        if rec.sunid =='-' : rec.sunid = ''
        if rec.parent =='-' : rec.parent = ''
        else : rec.parent = int( rec.parent )

        if children =='-' :
            rec.children = ()
        else :
            rec.children = children.split(',')
            rec.children = map ( int, rec.children )

        rec.sunid = int(rec.sunid)

        return rec