File: ClusterCollection.py

package info (click to toggle)
mdanalysis 2.10.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 116,696 kB
  • sloc: python: 92,135; ansic: 8,156; makefile: 215; sh: 138
file content (275 lines) | stat: -rw-r--r-- 8,299 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
Cluster representation --- :mod:`MDAnalysis.analysis.encore.clustering.ClusterCollection`
=========================================================================================

The module contains the Cluster and ClusterCollection classes which are
designed to store results from clustering algorithms.

:Author: Matteo Tiberti, Wouter Boomsma, Tone Bengtsen

.. versionadded:: 0.16.0

.. deprecated:: 2.8.0
   This module is deprecated in favour of the 
   MDAKit `mdaencore <https://mdanalysis.org/mdaencore/>`_ and will be removed
   in MDAnalysis 3.0.0.

"""
import numpy as np


class Cluster(object):
    """
    Generic Cluster class for clusters with centroids.

    Attributes
    ----------

    id : int
        Cluster ID number. Useful for the ClustersCollection class

    metadata : iterable
        dict of lists or numpy.array, containing metadata for the cluster
        elements. The iterable must return the same number of elements as
        those that belong to the cluster.

    size : int
        number of elements.

    centroid : element object
        cluster centroid.

    elements : numpy.array
        array containing the cluster elements.
    """

    def __init__(self, elem_list=None, centroid=None, idn=None, metadata=None):
        """Class constructor. If elem_list is None, an empty cluster is created
            and the remaining arguments ignored.

        Parameters
        ----------

        elem_list : numpy.array or None
            numpy array of cluster elements

        centroid : None or element object
            centroid

        idn : int
            cluster ID

        metadata : iterable
            metadata, one value for each cluster element. The iterable
            must have the same length as the elements array.

        """

        self.id = idn

        if elem_list is None:
            self.size = 0
            self.elements = np.array([])
            self.centroid = None
            self.metadata = {}
            return

        self.metadata = {}
        self.elements = elem_list
        if centroid not in self.elements:
            raise LookupError(
                "Centroid of cluster not found in the element list"
            )

        self.centroid = centroid
        self.size = self.elements.shape[0]
        if metadata:
            for name, data in metadata.items():
                if len(data) != self.size:
                    raise TypeError(
                        'Size of metadata having label "{0}" '
                        "is not equal to the number of cluster "
                        "elements".format(name)
                    )
            self.add_metadata(name, data)

    def __iter__(self):
        """
        Iterate over elements in cluster
        """
        return iter(self.elements)

    def __len__(self):
        """
        Size of cluster
        """
        return len(self.elements)

    def add_metadata(self, name, data):
        if len(data) != self.size:
            raise TypeError(
                "Size of metadata is not equal to the number of "
                "cluster elements"
            )
        self.metadata[name] = np.array(data)

    def __repr__(self):
        """
        Textual representation
        """
        if self.size == 0:
            return "<Cluster with no elements>"
        else:
            return "<Cluster with {0} elements, centroid={1}, id={2}>".format(
                self.size, self.centroid, self.id
            )


class ClusterCollection(object):
    """Clusters collection class; this class represents the results of a full
    clustering run. It stores a group of clusters defined as
    encore.clustering.Cluster objects.

    Attributes
    ----------

    clusters : list
        list of of Cluster objects which are part of the Cluster collection

    """

    def __init__(self, elements=None, metadata=None):
        """Class constructor. If elements is None, an empty cluster collection
        will be created. Otherwise, the constructor takes as input an
        iterable of ints, for instance:

        [ a, a, a, a, b, b, b, c, c, ... , z, z ]

        the variables a,b,c,...,z are cluster centroids, here as cluster
        element numbers (i.e. 3 means the 4th element of the ordered input
        for clustering). The array maps a correspondence between
        cluster elements (which are implicitly associated with the
        position in the array) with centroids, i. e. defines clusters.
        For instance:

        [ 1, 1, 1, 4, 4, 5 ]

        means that elements 0, 1, 2 form a cluster which has 1 as centroid,
        elements 3 and 4 form a cluster which has 4 as centroid, and
        element 5 has its own cluster.


        Parameters
        ----------

        elements : iterable of ints or None
            clustering results. See the previous description for details

        metadata : {str:list, str:list,...} or None
            metadata for the data elements. The list must be of the same
            size as the elements array, with one value per element.

        """
        idn = 0
        if elements is None:
            self.clusters = None
            return

        if not len(set((type(el) for el in elements))) == 1:
            raise TypeError("all the elements must have the same type")
        self.clusters = []
        elements_array = np.array(elements)
        centroids = np.unique(elements_array)
        for i in centroids:
            if elements[i] != i:
                raise ValueError(
                    "element {0}, which is a centroid, doesn't "
                    "belong to its own cluster".format(elements[i])
                )
        for c in centroids:
            this_metadata = {}
            this_array = np.where(elements_array == c)
            if metadata:
                for k, v in metadata.items():
                    this_metadata[k] = np.asarray(v)[this_array]
            self.clusters.append(
                Cluster(
                    elem_list=this_array[0],
                    idn=idn,
                    centroid=c,
                    metadata=this_metadata,
                )
            )

            idn += 1

    def get_ids(self):
        """
        Get the ID numbers of the clusters

        Returns
        -------

        ids : list of int
        list of cluster ids
        """
        return [v.id for v in self.clusters]

    def get_centroids(self):
        """
        Get the centroids of the clusters

        Returns
        -------

        centroids : list of cluster element objects
        list of cluster centroids
        """

        return [v.centroid for v in self.clusters]

    def __iter__(self):
        """
        Iterate over clusters

        """
        return iter(self.clusters)

    def __len__(self):
        """
        Length of clustering collection
        """
        return len(self.clusters)

    def __repr__(self):
        """
        Textual representation
        """
        if self.clusters is None:
            return "<ClusterCollection with no clusters>"
        else:
            return "<ClusterCollection with {0} clusters>".format(
                len(self.clusters)
            )