File: base.py

package info (click to toggle)
mdanalysis 2.10.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 116,696 kB
  • sloc: python: 92,135; ansic: 8,156; makefile: 215; sh: 138
file content (238 lines) | stat: -rw-r--r-- 7,668 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
Base topology reader classes --- :mod:`MDAnalysis.topology.base`
================================================================

Derive topology reader classes from the base class in this module. All
topology readers raise :exc:`IOError` upon failing to read a topology
file and :exc:`ValueError` upon failing to make sense of the read data.

Classes
-------

.. autoclass:: TopologyReaderBase
   :members:
   :inherited-members:

"""
from functools import reduce

import itertools
import numpy as np
import warnings

from .. import _PARSERS, _PARSER_HINTS
from ..coordinates.base import IOBase
from ..lib import util


class _Topologymeta(type):
    """Internal: Topology Parser registration voodoo

    When classes which inherit from TopologyReaderBase are *defined*
    this metaclass makes it known to MDAnalysis.  The optional `format`
    attribute and `_format_hint` staticmethod are read:
     - `format` defines the file extension this Parser targets.
     - `_format_hint` defines a function which returns a boolean if the
       Parser can process a particular object

    Eg::

      class ThingParser(TopologyReaderBase):
          format = ['foo', 'bar']

          @staticmethod
          _format_hint(thing):
              try:
                  import WeirdPackage
              except ImportError:
                  return False
              return isinstance(thing, WeirdPackage.Thing)

    This way there is no strict dependency on "WeirdPackage", but if
    a user supplies a WeirdPackage.Thing the "ThingParser' will be able
    to step up and read it.

    .. versionchanged:: 1.0.0
       Added format_hint functionality
    """

    def __init__(cls, name, bases, classdict):
        type.__init__(type, name, bases, classdict)
        try:
            fmt = util.asiterable(classdict["format"])
        except KeyError:
            pass
        else:
            for fmt_name in fmt:
                fmt_name = fmt_name.upper()
                _PARSERS[fmt_name] = cls

                if "_format_hint" in classdict:
                    _PARSER_HINTS[fmt_name] = classdict[
                        "_format_hint"
                    ].__func__


class TopologyReaderBase(IOBase, metaclass=_Topologymeta):
    """Base class for topology readers

    Parameters
    ----------
    filename : str
        name of the topology file
    universe : Universe, optional
        Supply a Universe to the Parser.  This then passes it to the
        atom instances that are created within parsers.

    All topology readers must define a `parse` method which
    returns a Topology object

    Raises
    ------
    * :exc:`IOError` upon failing to read a topology file
    * :exc:`ValueError` upon failing to make sense of the read data

    .. versionadded:: 0.9.0
    .. versionchanged:: 0.9.2
       Added keyword 'universe' to pass to Atom creation.
    """

    def __init__(self, filename):
        self.filename = filename

    def parse(self, **kwargs):  # pragma: no cover
        raise NotImplementedError("Override this in each subclass")


def squash_by(child_parent_ids, *attributes):
    """Squash a child-parent relationship

    Arguments
    ---------
    child_parent_ids - array of ids (unique values that identify the parent)
    *attributes - other arrays that need to follow the sorting of ids

    Returns
    -------
    child_parents_idx - an array of len(child) which points to the index of
                        parent
    parent_ids - len(parent) of the ids
    *parent_attrs - len(parent) of the other attributes
    """
    unique_resids, sort_mask, atom_idx = np.unique(
        child_parent_ids, return_index=True, return_inverse=True
    )

    return atom_idx, unique_resids, [attr[sort_mask] for attr in attributes]


def change_squash(criteria, to_squash):
    """Squash per atom data to per residue according to changes in resid

    Parameters
    ----------
    criteria : list of numpy ndarray
      Arrays which when changing indicate a new residue
    to_squash : list of numpy arrays
      Arrays which get squashed according to the criteria arrays

    Returns
    -------
    residx : numpy array
      The Residue *index* that each Atom gets assigned to. [len(resids)]
    squashed : numpy array
      The to_squash arrays reduced down to per Residue values


    Example
    -------
    resids = np.array([2, 2, 3, 3, 2, 2])
    resnames = np.array(['RsA', 'RsA', 'RsB', 'RsB', 'RsC', 'RsC'])
    segids = np.array(['A', 'A', 'A', 'A', 'B', 'B'])

    residx, (new_resids, new_resnames, new_segids) = resid_change_squash(
                                                      (resids,), (resids, resnames, segids))

    # Per atom res index
    residx: [0, 0, 1, 1, 2, 2]
    # Per residue record of each attribute
    new_resids: [2, 3, 2]
    new_resnames: ['RsA', 'RsB', 'RsC']
    new_segids: ['A', 'A', 'B']
    """

    def get_borders(*arrays):
        """Generator of indices to slice arrays when they change"""
        borders = np.nonzero(
            reduce(np.logical_or, (a[:-1] != a[1:] for a in arrays))
        )
        # Add Nones so we can slice from start to end
        return [None] + list(borders[0] + 1) + [None]

    l0 = len(criteria[0])
    if not all(
        len(other) == l0 for other in itertools.chain(criteria[1:], to_squash)
    ):
        raise ValueError("All arrays must be equally sized")

    # 1) Detect where resids change
    borders = get_borders(*criteria)
    # Number of groups = number of changes + 1
    # 2 `None`s have been added, so -1
    nres = len(borders) - 1

    # 2) Allocate new arrays
    # Per atom record of what residue they belong to
    residx = np.zeros_like(criteria[0], dtype=int)
    # Per residue record of various attributes
    new_others = [np.zeros(nres, dtype=o.dtype) for o in to_squash]

    # 3) Slice through resids and others to find values
    for i, (x, y) in enumerate(zip(borders[:-1], borders[1:])):
        residx[x:y] = i  # atoms between x & y are in the i'th residue
        for old, new in zip(to_squash, new_others):
            new[i] = old[x:y][0]  # TODO: Check that x:y is the same
            # Should be the same for self consistency...

    return residx, new_others


def reduce_singular(values):
    """Returns the value in an array of length 1, or
    the tuple of an array with a longer lengh.

    Parameters
    ----------
    values: array-like
        Array to squash

    Returns
    -------
    values: tuple or single value
    """
    if len(values) == 1:
        return values[0]
    else:
        return tuple(values)