File: leaflet.py

package info (click to toggle)
mdanalysis 2.10.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 116,696 kB
  • sloc: python: 92,135; ansic: 8,156; makefile: 215; sh: 138
file content (387 lines) | stat: -rw-r--r-- 13,360 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#


"""
Leaflet identification --- :mod:`MDAnalysis.analysis.leaflet`
==============================================================

This module implements the *LeafletFinder* algorithm, described in
[Michaud-Agrawal2011]_. It can identify the lipids in a bilayer of
arbitrary shape and topology, including planar and undulating bilayers
under periodic boundary conditions or vesicles.

One can use this information to identify

* the upper and lower leaflet of a *planar membrane* by comparing the
  the :meth:`~MDAnalysis.core.groups.AtomGroup.center_of_geometry` of
  the leaflet groups, or

* the outer and inner leaflet of a *vesicle* by comparing histograms
  of distances from the centre of geometry (or possibly simply the
  :meth:`~MDAnalysis.core.groups.AtomGroup.radius_of_gyration`).

See example scripts in the MDAnalysisCookbook_ on how to use
:class:`LeafletFinder`. The function :func:`optimize_cutoff` implements a
(slow) heuristic method to find the best cut off for the LeafletFinder
algorithm.

.. _MDAnalysisCookbook: https://github.com/MDAnalysis/MDAnalysisCookbook/tree/master/examples


Algorithm
---------

1. build a graph of all phosphate distances < cutoff
2. identify the largest connected subgraphs
3. analyse first and second largest graph, which correspond to the leaflets

For further details see [Michaud-Agrawal2011]_.


Classes and Functions
---------------------

.. autoclass:: LeafletFinder
   :members:

.. autofunction:: optimize_cutoff

"""
import warnings

import numpy as np

from .. import core
from . import distances
from .. import selections
from ..due import due, Doi


# networkx is an optional import
try:
    import networkx as NX
except ImportError:
    HAS_NX = False
else:
    HAS_NX = True


due.cite(
    Doi("10.1002/jcc.21787"),
    description="LeafletFinder algorithm",
    path="MDAnalysis.analysis.leaflet",
    cite_module=True,
)
del Doi


class LeafletFinder(object):
    """Identify atoms in the same leaflet of a lipid bilayer.

    This class implements the *LeafletFinder* algorithm [Michaud-Agrawal2011]_.

    Parameters
    ----------
    universe : Universe
        :class:`~MDAnalysis.core.universe.Universe` object.
    select : AtomGroup or str
        A AtomGroup instance or a
        :meth:`Universe.select_atoms` selection string
        for atoms that define the lipid head groups, e.g.
        universe.atoms.PO4 or "name PO4" or "name P*"
    cutoff : float (optional)
        head group-defining atoms within a distance of `cutoff`
        Angstroms are deemed to be in the same leaflet [15.0]
    pbc : bool (optional)
        take periodic boundary conditions into account [``False``]
    sparse : bool (optional)
        ``None``: use fastest possible routine; ``True``: use slow
        sparse matrix implementation (for large systems); ``False``:
        use fast :func:`~MDAnalysis.lib.distances.distance_array`
        implementation [``None``].

    Raises
    ------
    ImportError
      This class requires the optional package `networkx`. If not found
      an ImportError is raised.

    Example
    -------
    The components of the graph are stored in the list
    :attr:`LeafletFinder.components`; the atoms in each component are numbered
    consecutively, starting at 0. To obtain the atoms in the input structure
    use :meth:`LeafletFinder.groups`::

       u = mda.Universe(PDB)
       L = LeafletFinder(u, 'name P*')
       leaflet0 = L.groups(0)
       leaflet1 = L.groups(1)

    The residues can be accessed through the standard MDAnalysis mechanism::

       leaflet0.residues

    provides a :class:`~MDAnalysis.core.groups.ResidueGroup`
    instance. Similarly, all atoms in the first leaflet are then ::

       leaflet0.residues.atoms


    .. versionchanged:: 1.0.0
       Changed `selection` keyword to `select`
    .. versionchanged:: 2.0.0
       The universe keyword no longer accepts non-Universe arguments. Please
       create a :class:`~MDAnalysis.core.universe.Universe` first.
    """

    def __init__(self, universe, select, cutoff=15.0, pbc=False, sparse=None):
        # Raise an error if networkx is not installed
        if not HAS_NX:
            errmsg = (
                "The LeafletFinder class requires an installation of "
                "networkx. Please install networkx "
                "https://networkx.org/documentation/stable/install.html"
            )
            raise ImportError(errmsg)

        self.universe = universe
        self.selectionstring = select
        if isinstance(self.selectionstring, core.groups.AtomGroup):
            self.selection = self.selectionstring
        else:
            self.selection = universe.select_atoms(self.selectionstring)
        self.pbc = pbc
        self.sparse = sparse
        self._init_graph(cutoff)

    def _init_graph(self, cutoff):
        self.cutoff = cutoff
        self.graph = self._get_graph()
        self.components = self._get_components()

    # The last two calls in _get_graph() and the single line in
    # _get_components() are all that are needed to make the leaflet
    # detection work.

    def _get_graph(self):
        """Build graph from adjacency matrix at the given cutoff.
        Automatically select between high and low memory usage versions of
        contact_matrix."""
        # could use self_distance_array to speed up but then need to deal with the sparse indexing
        if self.pbc:
            box = self.universe.trajectory.ts.dimensions
        else:
            box = None
        coord = self.selection.positions
        if self.sparse is False:
            # only try distance array
            try:
                adj = distances.contact_matrix(
                    coord, cutoff=self.cutoff, returntype="numpy", box=box
                )
            except ValueError:  # pragma: no cover
                warnings.warn(
                    "N x N matrix too big, use sparse=True or sparse=None",
                    category=UserWarning,
                    stacklevel=2,
                )
                raise
        elif self.sparse is True:
            # only try sparse
            adj = distances.contact_matrix(
                coord, cutoff=self.cutoff, returntype="sparse", box=box
            )
        else:
            # use distance_array and fall back to sparse matrix
            try:
                # this works for small-ish systems and depends on system memory
                adj = distances.contact_matrix(
                    coord, cutoff=self.cutoff, returntype="numpy", box=box
                )
            except ValueError:  # pragma: no cover
                # but use a sparse matrix method for larger systems for memory reasons
                warnings.warn(
                    "N x N matrix too big - switching to sparse matrix method (works fine, but is currently rather "
                    "slow)",
                    category=UserWarning,
                    stacklevel=2,
                )
                adj = distances.contact_matrix(
                    coord, cutoff=self.cutoff, returntype="sparse", box=box
                )
        return NX.Graph(adj)

    def _get_components(self):
        """Return connected components (as sorted numpy arrays), sorted by size."""
        return [
            np.sort(list(component))
            for component in NX.connected_components(self.graph)
        ]

    def update(self, cutoff=None):
        """Update components, possibly with a different *cutoff*"""
        if cutoff is None:
            cutoff = self.cutoff
        self._init_graph(cutoff)

    def sizes(self):
        """Dict of component index with size of component."""
        return dict(
            (
                (idx, len(component))
                for idx, component in enumerate(self.components)
            )
        )

    def groups(self, component_index=None):
        """Return a :class:`MDAnalysis.core.groups.AtomGroup` for *component_index*.

        If no argument is supplied, then a list of all leaflet groups is returned.

        See Also
        --------
        :meth:`LeafletFinder.group`
        :meth:`LeafletFinder.groups_iter`
        """
        if component_index is None:
            return list(self.groups_iter())
        else:
            return self.group(component_index)

    def group(self, component_index):
        """Return a :class:`MDAnalysis.core.groups.AtomGroup` for *component_index*."""
        # maybe cache this?
        indices = [i for i in self.components[component_index]]
        return self.selection[indices]

    def groups_iter(self):
        """Iterator over all leaflet :meth:`groups`"""
        for component_index in range(len(self.components)):
            yield self.group(component_index)

    def write_selection(self, filename, **kwargs):
        """Write selections for the leaflets to *filename*.

        The format is typically determined by the extension of *filename*
        (e.g. "vmd", "pml", or "ndx" for VMD, PyMol, or Gromacs).

        See :class:`MDAnalysis.selections.base.SelectionWriter` for all
        options.
        """
        sw = selections.get_writer(filename, kwargs.pop("format", None))
        with sw(
            filename,
            mode=kwargs.pop("mode", "w"),
            preamble="leaflets based on select={selectionstring!r} cutoff={cutoff:f}\n".format(
                **vars(self)
            ),
            **kwargs,
        ) as writer:
            for i, ag in enumerate(self.groups_iter()):
                name = "leaflet_{0:d}".format((i + 1))
                writer.write(ag, name=name)

    def __repr__(self):
        return "<LeafletFinder({0!r}, cutoff={1:.1f} A) with {2:d} atoms in {3:d} groups>".format(
            self.selectionstring,
            self.cutoff,
            self.selection.n_atoms,
            len(self.components),
        )


def optimize_cutoff(
    universe,
    select,
    dmin=10.0,
    dmax=20.0,
    step=0.5,
    max_imbalance=0.2,
    **kwargs,
):
    r"""Find cutoff that minimizes number of disconnected groups.

    Applies heuristics to find best groups:

    1. at least two groups (assumes that there are at least 2 leaflets)
    2. reject any solutions for which:

       .. math::

              \frac{|N_0 - N_1|}{|N_0 + N_1|} > \mathrm{max_imbalance}

       with :math:`N_i` being the number of lipids in group
       :math:`i`. This heuristic picks groups with balanced numbers of
       lipids.

    Parameters
    ----------
    universe : Universe
        :class:`MDAnalysis.Universe` instance
    select : AtomGroup or str
        AtomGroup or selection string as used for :class:`LeafletFinder`
    dmin : float (optional)
    dmax : float (optional)
    step : float (optional)
        scan cutoffs from `dmin` to `dmax` at stepsize `step` (in Angstroms)
    max_imbalance : float (optional)
        tuning parameter for the balancing heuristic [0.2]
    kwargs : other keyword arguments
        other arguments for  :class:`LeafletFinder`

    Returns
    -------
    (cutoff, N)
         optimum cutoff and number of groups found


    .. Note:: This function can die in various ways if really no
              appropriate number of groups can be found; it ought  to be
              made more robust.

    .. versionchanged:: 1.0.0
       Changed `selection` keyword to `select`
    """
    kwargs.pop("cutoff", None)  # not used, so we filter it
    _sizes = []
    for cutoff in np.arange(dmin, dmax, step):
        LF = LeafletFinder(universe, select, cutoff=cutoff, **kwargs)
        # heuristic:
        #  1) N > 1
        #  2) no imbalance between large groups:
        sizes = LF.sizes()
        if len(sizes) < 2:
            continue
        n0 = float(sizes[0])  # sizes of two biggest groups ...
        n1 = float(sizes[1])  # ... assumed to be the leaflets
        imbalance = np.abs(n0 - n1) / (n0 + n1)
        # print "sizes: %(sizes)r; imbalance=%(imbalance)f" % vars()
        if imbalance > max_imbalance:
            continue
        _sizes.append((cutoff, len(LF.sizes())))
    results = np.rec.fromrecords(_sizes, names="cutoff,N")
    del _sizes
    results.sort(order=["N", "cutoff"])  # sort ascending by N, then cutoff
    return results[0]  # (cutoff,N) with N>1 and shortest cutoff