File: ResidueDepth.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3~bpo8%2B1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-backports
  • size: 46,856 kB
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (175 lines) | stat: -rw-r--r-- 5,010 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Calculation of residue depth using command line tool MSMS.

This module uses Michel Sanner's MSMS program for the surface calculation
(specifically commands msms and pdb_to_xyzr). See:
http://mgltools.scripps.edu/packages/MSMS

Residue depth is the average distance of the atoms of a residue from
the solvent accessible surface.

Residue Depth:

    >>> rd = ResidueDepth(model, pdb_file)
    >>> print(rd[(chain_id, res_id)])

Direct MSMS interface, typical use:

    >>> surface = get_surface("1FAT.pdb")

The surface is a Numeric array with all the surface vertices.

Distance to surface:

    >>> dist = min_dist(coord, surface)

where coord is the coord of an atom within the volume bound by
the surface (ie. atom depth).

To calculate the residue depth (average atom depth of the atoms
in a residue):

    >>> rd = residue_depth(residue, surface)

"""

from __future__ import print_function

import os
import tempfile

import numpy

from Bio.PDB import Selection
from Bio.PDB.AbstractPropertyMap import AbstractPropertyMap
from Bio.PDB.Polypeptide import is_aa


def _read_vertex_array(filename):
    """
    Read the vertex list into a Numeric array.
    """
    with open(filename, "r") as fp:
        vertex_list = []
        for l in fp.readlines():
            sl = l.split()
            if not len(sl) == 9:
                # skip header
                continue
            vl = [float(x) for x in sl[0:3]]
            vertex_list.append(vl)
    return numpy.array(vertex_list)


def get_surface(pdb_file, PDB_TO_XYZR="pdb_to_xyzr", MSMS="msms"):
    """
    Return a Numeric array that represents
    the vertex list of the molecular surface.

    PDB_TO_XYZR --- pdb_to_xyzr executable (arg. to os.system)
    MSMS --- msms executable (arg. to os.system)
    """
    # extract xyz and set radii
    xyz_tmp = tempfile.mktemp()
    PDB_TO_XYZR = PDB_TO_XYZR + " %s > %s"
    make_xyz = PDB_TO_XYZR % (pdb_file, xyz_tmp)
    os.system(make_xyz)
    assert os.path.isfile(xyz_tmp), \
        "Failed to generate XYZR file using command:\n%s" % make_xyz
    # make surface
    surface_tmp = tempfile.mktemp()
    MSMS = MSMS + " -probe_radius 1.5 -if %s -of %s > " + tempfile.mktemp()
    make_surface = MSMS % (xyz_tmp, surface_tmp)
    os.system(make_surface)
    surface_file = surface_tmp + ".vert"
    assert os.path.isfile(surface_file), \
        "Failed to generate surface file using command:\n%s" % make_surface
    # read surface vertices from vertex file
    surface = _read_vertex_array(surface_file)
    # clean up tmp files
    # ...this is dangerous
    # os.system("rm "+xyz_tmp)
    # os.system("rm "+surface_tmp+".vert")
    # os.system("rm "+surface_tmp+".face")
    return surface


def min_dist(coord, surface):
    """
    Return minimum distance between coord
    and surface.
    """
    d = surface - coord
    d2 = numpy.sum(d * d, 1)
    return numpy.sqrt(min(d2))


def residue_depth(residue, surface):
    """
    Return average distance to surface for all
    atoms in a residue, ie. the residue depth.
    """
    atom_list = residue.get_unpacked_list()
    length = len(atom_list)
    d = 0
    for atom in atom_list:
        coord = atom.get_coord()
        d = d + min_dist(coord, surface)
    return d / length


def ca_depth(residue, surface):
    if not residue.has_id("CA"):
        return None
    ca = residue["CA"]
    coord = ca.get_coord()
    return min_dist(coord, surface)


class ResidueDepth(AbstractPropertyMap):
    """
    Calculate residue and CA depth for all residues.
    """
    def __init__(self, model, pdb_file):
        depth_dict = {}
        depth_list = []
        depth_keys = []
        # get_residue
        residue_list = Selection.unfold_entities(model, 'R')
        # make surface from PDB file
        surface = get_surface(pdb_file)
        # calculate rdepth for each residue
        for residue in residue_list:
            if not is_aa(residue):
                continue
            rd = residue_depth(residue, surface)
            ca_rd = ca_depth(residue, surface)
            # Get the key
            res_id = residue.get_id()
            chain_id = residue.get_parent().get_id()
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra['EXP_RD'] = rd
            residue.xtra['EXP_RD_CA'] = ca_rd
        AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)


if __name__ == "__main__":

    import sys
    from Bio.PDB import PDBParser

    p = PDBParser()
    s = p.get_structure("X", sys.argv[1])
    model = s[0]

    rd = ResidueDepth(model, sys.argv[1])

    for item in rd:
        print(item)