File: DSSP.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (206 lines) | stat: -rw-r--r-- 5,494 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
from __future__ import generators

import os
import tempfile
from Bio.PDB import *
from PDBExceptions import PDBException
from AbstractPropertyMap import AbstractResiduePropertyMap
import re


__doc__="""
Use the DSSP program to calculate secondary structure and accessibility.
You need to have a working version of DSSP (and a license, free for 
academic use) in order to use this. For DSSP, see U{http://www.cmbi.kun.nl/gv/dssp/}.

The DSSP codes for secondary structure used here are:

    - H        Alpha helix (4-12)
    - B        Isolated beta-bridge residue
    - E        Strand
    - G        3-10 helix
    - I        pi helix
    - T        Turn
    - S        Bend
    - -        None
"""

# Match C in DSSP
_dssp_cys=re.compile('[a-z]')

# Maximal ASA of amino acids
# Values from Sander & Rost, (1994), Proteins, 20:216-226
# Used for relative accessibility
MAX_ACC={}
MAX_ACC["ALA"]=106.0
MAX_ACC["CYS"]=135.0
MAX_ACC["ASP"]=163.0
MAX_ACC["GLU"]=194.0
MAX_ACC["PHE"]=197.0
MAX_ACC["GLY"]=84.0
MAX_ACC["HIS"]=184.0
MAX_ACC["ILE"]=169.0
MAX_ACC["LYS"]=205.0
MAX_ACC["LEU"]=164.0
MAX_ACC["MET"]=188.0
MAX_ACC["ASN"]=157.0
MAX_ACC["PRO"]=136.0
MAX_ACC["GLN"]=198.0
MAX_ACC["ARG"]=248.0
MAX_ACC["SER"]=130.0
MAX_ACC["THR"]=142.0
MAX_ACC["VAL"]=142.0
MAX_ACC["TRP"]=227.0
MAX_ACC["TYR"]=222.0

def dssp_dict_from_pdb_file(in_file, DSSP="dssp"):
    """
    Create a DSSP dictionary from a PDB file.

    Example:
        >>> dssp_dict=dssp_dict_from_pdb_file("1fat.pdb")
        >>> aa, ss, acc=dssp_dict[('A', 1)]

    @param in_file: pdb file
    @type in_file: string

    @param DSSP: DSSP executable (argument to os.system)
    @type DSSP: string

    @return: a dictionary that maps (chainid, resid) to 
        amino acid type, secondary structure code and 
        accessibility.
    @rtype: {}
    """
    out_file=tempfile.mktemp()
    os.system(DSSP+" %s > %s" % (in_file, out_file))
    dict, keys=make_dssp_dict(out_file)
    # This can be dangerous...
    #os.system("rm "+out_file)
    return dict, keys

def make_dssp_dict(filename):
    """
    Return a DSSP dictionary that maps (chainid, resid) to
    aa, ss and accessibility, from a DSSP file.

    @param filename: the DSSP output file
    @type filename: string
    """
    dssp={}
    fp=open(filename, "r")
    start=0
    keys=[]
    for l in fp.readlines():
        sl=l.split()
        if sl[1]=="RESIDUE":
            # start
            start=1
            continue
        if not start:
            continue
        if l[9]==" ":
            # skip -- missing residue
            continue
        resseq=int(l[5:10])
        icode=l[10]
        chainid=l[11]
        aa=l[13]
        ss=l[16]
        if ss==" ":
            ss="-"
        acc=int(l[34:38])
        res_id=(" ", resseq, icode)
        dssp[(chainid, res_id)]=(aa, ss, acc)
        keys.append((chainid, res_id))
    fp.close()
    return dssp, keys


class DSSP(AbstractResiduePropertyMap):
    """
    Run DSSP on a pdb file, and provide a handle to the 
    DSSP secondary structure and accessibility.

    Note that DSSP can only handle one model.

    Example:
        >>> p=PDBParser()
        >>> structure=parser.get_structure("1fat.pdb")
        >>> model=structure[0]
        >>> dssp=DSSP(model, "1fat.pdb")
        >>> # print dssp data for a residue
        >>> secondary_structure, accessibility=dssp[(chain_id, res_id)]
    """
    def __init__(self, model, pdb_file, dssp="dssp"):
        """
        @param model: the first model of the structure
        @type model: L{Model}

        @param pdb_file: a PDB file
        @type pdb_file: string

        @param dssp: the dssp executable (ie. the argument to os.system)
        @type dssp: string
        """
        # create DSSP dictionary
        dssp_dict, dssp_keys=dssp_dict_from_pdb_file(pdb_file, dssp)
        dssp_map={}
        dssp_list=[]
        # Now create a dictionary that maps Residue objects to 
        # secondary structure and accessibility, and a list of 
        # (residue, (secondary structure, accessibility)) tuples
        for key in dssp_keys:
            chain_id, res_id=key
            chain=model[chain_id]
            res=chain[res_id]
            aa, ss, acc=dssp_dict[key]
            res.xtra["SS_DSSP"]=ss
            res.xtra["EXP_DSSP_ASA"]=acc
            # relative accessibility
            resname=res.get_resname()
            rel_acc=acc/MAX_ACC[resname]
            if rel_acc>1.0:
                rel_acc=1.0
            res.xtra["EXP_DSSP_RASA"]=rel_acc
            # Verify if AA in DSSP == AA in Structure
            # Something went wrong if this is not true!
            resname=to_one_letter_code[resname]
            if resname=="C":
                # DSSP renames C in C-bridges to a,b,c,d,...
                # - we rename it back to 'C'
                if _dssp_cys.match(aa):
                    aa='C'
            if not (resname==aa):
                raise PDBException, "Structure/DSSP mismatch at "+str(res) 
            dssp_map[key]=((res, ss, acc, rel_acc))
            dssp_list.append((res, ss, acc, rel_acc))
        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)


if __name__=="__main__":

    import sys

    p=PDBParser()
    s=p.get_structure('X', sys.argv[1])

    model=s[0]

    d=DSSP(model, sys.argv[1])

    for r in d:
        print r

    print d.keys()

    print len(d)

    print d.has_key(('A', 1))

    print d[('A', 1)]

    print s[0]['A'][1].xtra