File: NeighborSearch.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3~bpo8%2B1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-backports
  • size: 46,856 kB
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (146 lines) | stat: -rw-r--r-- 4,703 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Fast atom neighbor lookup using a KD tree (implemented in C++)."""

from __future__ import print_function

import numpy

from Bio.KDTree import KDTree

from Bio.PDB.PDBExceptions import PDBException
from Bio.PDB.Selection import unfold_entities, entity_levels, uniqueify


class NeighborSearch(object):
    """Class for neighbor searching,

    This class can be used for two related purposes:

     1. To find all atoms/residues/chains/models/structures within radius
        of a given query position.
     2. To find all atoms/residues/chains/models/structures that are within
        a fixed radius of each other.

    NeighborSearch makes use of the Bio.KDTree C++ module, so it's fast.
    """
    def __init__(self, atom_list, bucket_size=10):
        """Create the object.

        Arguments:

         - atom_list - list of atoms. This list is used in the queries.
           It can contain atoms from different structures.
         - bucket_size - bucket size of KD tree. You can play around
           with this to optimize speed if you feel like it.
        """
        self.atom_list = atom_list
        # get the coordinates
        coord_list = [a.get_coord() for a in atom_list]
        # to Nx3 array of type float
        self.coords = numpy.array(coord_list).astype("f")
        assert(bucket_size > 1)
        assert(self.coords.shape[1] == 3)
        self.kdt = KDTree(3, bucket_size)
        self.kdt.set_coords(self.coords)

    # Private

    def _get_unique_parent_pairs(self, pair_list):
        # translate a list of (entity, entity) tuples to
        # a list of (parent entity, parent entity) tuples,
        # thereby removing duplicate (parent entity, parent entity)
        # pairs.
        # o pair_list - a list of (entity, entity) tuples
        parent_pair_list = []
        for (e1, e2) in pair_list:
            p1 = e1.get_parent()
            p2 = e2.get_parent()
            if p1 == p2:
                continue
            elif p1 < p2:
                parent_pair_list.append((p1, p2))
            else:
                parent_pair_list.append((p2, p1))
        return uniqueify(parent_pair_list)

    # Public

    def search(self, center, radius, level="A"):
        """Neighbor search.

        Return all atoms/residues/chains/models/structures
        that have at least one atom within radius of center.
        What entity level is returned (e.g. atoms or residues)
        is determined by level (A=atoms, R=residues, C=chains,
        M=models, S=structures).

        Arguments:

         - center - Numeric array
         - radius - float
         - level - char (A, R, C, M, S)
        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        self.kdt.search(center, radius)
        indices = self.kdt.get_indices()
        n_atom_list = []
        atom_list = self.atom_list
        for i in indices:
            a = atom_list[i]
            n_atom_list.append(a)
        if level == "A":
            return n_atom_list
        else:
            return unfold_entities(n_atom_list, level)

    def search_all(self, radius, level="A"):
        """All neighbor search.

        Search all entities that have atoms pairs within
        radius.

        Arguments:

         - radius - float
         - level - char (A, R, C, M, S)
        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        self.kdt.all_search(radius)
        indices = self.kdt.all_get_indices()
        atom_list = self.atom_list
        atom_pair_list = []
        for i1, i2 in indices:
            a1 = atom_list[i1]
            a2 = atom_list[i2]
            atom_pair_list.append((a1, a2))
        if level == "A":
            # return atoms
            return atom_pair_list
        next_level_pair_list = atom_pair_list
        for l in ["R", "C", "M", "S"]:
            next_level_pair_list = self._get_unique_parent_pairs(next_level_pair_list)
            if level == l:
                return next_level_pair_list

if __name__ == "__main__":

    from numpy.random import random

    class Atom(object):
        def __init__(self):
            self.coord = (100 * random(3))

        def get_coord(self):
            return self.coord

    for i in range(0, 20):
        # Make a list of 100 atoms
        al = [Atom() for j in range(100)]
        ns = NeighborSearch(al)
        print("Found %i" % len(ns.search_all(5.0)))