1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
|
from __future__ import generators
import os
import tempfile
from Bio.PDB import *
from PDBExceptions import PDBException
from AbstractPropertyMap import AbstractResiduePropertyMap
import re
__doc__="""
Use the DSSP program to calculate secondary structure and accessibility.
You need to have a working version of DSSP (and a license, free for
academic use) in order to use this. For DSSP, see U{http://www.cmbi.kun.nl/gv/dssp/}.
The DSSP codes for secondary structure used here are:
- H Alpha helix (4-12)
- B Isolated beta-bridge residue
- E Strand
- G 3-10 helix
- I pi helix
- T Turn
- S Bend
- - None
"""
# Match C in DSSP
_dssp_cys=re.compile('[a-z]')
# Maximal ASA of amino acids
# Values from Sander & Rost, (1994), Proteins, 20:216-226
# Used for relative accessibility
MAX_ACC={}
MAX_ACC["ALA"]=106.0
MAX_ACC["CYS"]=135.0
MAX_ACC["ASP"]=163.0
MAX_ACC["GLU"]=194.0
MAX_ACC["PHE"]=197.0
MAX_ACC["GLY"]=84.0
MAX_ACC["HIS"]=184.0
MAX_ACC["ILE"]=169.0
MAX_ACC["LYS"]=205.0
MAX_ACC["LEU"]=164.0
MAX_ACC["MET"]=188.0
MAX_ACC["ASN"]=157.0
MAX_ACC["PRO"]=136.0
MAX_ACC["GLN"]=198.0
MAX_ACC["ARG"]=248.0
MAX_ACC["SER"]=130.0
MAX_ACC["THR"]=142.0
MAX_ACC["VAL"]=142.0
MAX_ACC["TRP"]=227.0
MAX_ACC["TYR"]=222.0
def dssp_dict_from_pdb_file(in_file, DSSP="dssp"):
"""
Create a DSSP dictionary from a PDB file.
Example:
>>> dssp_dict=dssp_dict_from_pdb_file("1fat.pdb")
>>> aa, ss, acc=dssp_dict[('A', 1)]
@param in_file: pdb file
@type in_file: string
@param DSSP: DSSP executable (argument to os.system)
@type DSSP: string
@return: a dictionary that maps (chainid, resid) to
amino acid type, secondary structure code and
accessibility.
@rtype: {}
"""
out_file=tempfile.mktemp()
os.system(DSSP+" %s > %s" % (in_file, out_file))
dict, keys=make_dssp_dict(out_file)
# This can be dangerous...
#os.system("rm "+out_file)
return dict, keys
def make_dssp_dict(filename):
"""
Return a DSSP dictionary that maps (chainid, resid) to
aa, ss and accessibility, from a DSSP file.
@param filename: the DSSP output file
@type filename: string
"""
dssp={}
fp=open(filename, "r")
start=0
keys=[]
for l in fp.readlines():
sl=l.split()
if sl[1]=="RESIDUE":
# start
start=1
continue
if not start:
continue
if l[9]==" ":
# skip -- missing residue
continue
resseq=int(l[5:10])
icode=l[10]
chainid=l[11]
aa=l[13]
ss=l[16]
if ss==" ":
ss="-"
acc=int(l[34:38])
res_id=(" ", resseq, icode)
dssp[(chainid, res_id)]=(aa, ss, acc)
keys.append((chainid, res_id))
fp.close()
return dssp, keys
class DSSP(AbstractResiduePropertyMap):
"""
Run DSSP on a pdb file, and provide a handle to the
DSSP secondary structure and accessibility.
Note that DSSP can only handle one model.
Example:
>>> p=PDBParser()
>>> structure=parser.get_structure("1fat.pdb")
>>> model=structure[0]
>>> dssp=DSSP(model, "1fat.pdb")
>>> # print dssp data for a residue
>>> secondary_structure, accessibility=dssp[(chain_id, res_id)]
"""
def __init__(self, model, pdb_file, dssp="dssp"):
"""
@param model: the first model of the structure
@type model: L{Model}
@param pdb_file: a PDB file
@type pdb_file: string
@param dssp: the dssp executable (ie. the argument to os.system)
@type dssp: string
"""
# create DSSP dictionary
dssp_dict, dssp_keys=dssp_dict_from_pdb_file(pdb_file, dssp)
dssp_map={}
dssp_list=[]
# Now create a dictionary that maps Residue objects to
# secondary structure and accessibility, and a list of
# (residue, (secondary structure, accessibility)) tuples
for key in dssp_keys:
chain_id, res_id=key
chain=model[chain_id]
res=chain[res_id]
aa, ss, acc=dssp_dict[key]
res.xtra["SS_DSSP"]=ss
res.xtra["EXP_DSSP_ASA"]=acc
# relative accessibility
resname=res.get_resname()
rel_acc=acc/MAX_ACC[resname]
if rel_acc>1.0:
rel_acc=1.0
res.xtra["EXP_DSSP_RASA"]=rel_acc
# Verify if AA in DSSP == AA in Structure
# Something went wrong if this is not true!
resname=to_one_letter_code[resname]
if resname=="C":
# DSSP renames C in C-bridges to a,b,c,d,...
# - we rename it back to 'C'
if _dssp_cys.match(aa):
aa='C'
if not (resname==aa):
raise PDBException, "Structure/DSSP mismatch at "+str(res)
dssp_map[key]=((res, ss, acc, rel_acc))
dssp_list.append((res, ss, acc, rel_acc))
AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
if __name__=="__main__":
import sys
p=PDBParser()
s=p.get_structure('X', sys.argv[1])
model=s[0]
d=DSSP(model, sys.argv[1])
for r in d:
print r
print d.keys()
print len(d)
print d.has_key(('A', 1))
print d[('A', 1)]
print s[0]['A'][1].xtra
|