File: Cla.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (139 lines) | stat: -rw-r--r-- 3,825 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Copyright 2001 by Gavin E. Crooks.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

""" Handle the SCOP CLAssification file, which describes SCOP domains.

The file format is described in the scop
"release notes.":http://scop.berkeley.edu/release-notes-1.55.html 
The latest CLA file can be found
"elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
  
"Release 1.55":http://scop.berkeley.edu/parse/dir.cla.scop.txt_1.55 (July 2001)

"""


from types import *

from Residues import * 
from FileIndex import FileIndex


class Record:
    """Holds information for one SCOP domain

    sid         --  SCOP identifier. e.g. d1danl2

    residues    --  The domain definition as a Residues object

    sccs        --  SCOP concise classification strings.  e.g. b.1.2.1

    sunid       --  SCOP unique identifier for this domain

    hierarchy   --  A sequence of tuples (nodetype, sunid) describing the
                    location of this domain in the SCOP hierarchy.
                    See the Scop module for a description of nodetypes.
    """
    def __init__(self):
        self.sid = ''
        self.residues = None 
        self.sccs = ''
        self.sunid =''
        self.hierarchy = []
        
    def __str__(self):
        s = []
        s.append(self.sid)
        s += str(self.residues).split(" ")
        s.append(self.sccs)
        s.append(self.sunid)

        h=[]
        for ht in self.hierarchy:
             h.append("=".join(map(str,ht))) 
        s.append(",".join(h))
       
        return "\t".join(map(str,s)) + "\n"

class Iterator:
    """Iterates over a CLA file.
    """
    def __init__(self, handle, parser=None):
        """Create an object that iterates over a DES file.

        handle -- file-like object.

        parser -- an optional Parser object to chang the results into
                  another form.  If set to None, then the raw contents
                  of the file will be returned.

        """
        if type(handle) is not FileType and type(handle) is not InstanceType:
            raise TypeError, "I expected a file handle or file-like object"
        self._handle = handle
        self._parser = parser

    def next(self):
        """Retrieve the next CLA record."""    
        while 1:
            line = self._handle.readline()
            if not line: return None
            if line[0] !='#':  break  # Not a comment line
        if self._parser is not None :
            return self._parser.parse(line)
        return line
    
    def __iter__(self):
        return iter(self.next, None)


class Parser:
    """Parses tab-deliminated CLA records.
    """
    def parse(self, entry):
        """Returns a Cla Record """        
        entry = entry.rstrip()        # no trailing whitespace
        columns = entry.split('\t')   # separate the tab-delineated cols
        if len(columns) != 6:
            raise SyntaxError, "I don't understand the format of %s" % entry
        
        rec = Record()
        rec.sid, pdbid, residues, rec.sccs, rec.sunid, hierarchy = columns
        rec.residues = Residues(residues)
        rec.residues.pdbid = pdbid
        rec.sunid = int(rec.sunid)
        
        h = []
        for ht in hierarchy.split(",") :
            h.append( ht.split('='))        
        for ht in h:
            ht[1] = int(ht[1])
        rec.hierarchy = h
        

        return rec


class Index(FileIndex):
    """A CLA file indexed by SCOP identifiers."""
    def __init__(self, filename, ) :
        iterator = lambda f : Iterator(f, Parser())
        key_gen  = lambda rec : rec.sid

        FileIndex.__init__(self, filename, iterator, key_gen)