File: streamlines.py

package info (click to toggle)
mdanalysis 2.10.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 116,696 kB
  • sloc: python: 92,135; ansic: 8,156; makefile: 215; sh: 138
file content (492 lines) | stat: -rw-r--r-- 19,359 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""Streamplots (2D)  --- :mod:`MDAnalysis.visualization.streamlines`
=================================================================

:Authors: Tyler Reddy and Matthieu Chavent
:Year: 2014
:Copyright: Lesser GNU Public License v2.1+


The :func:`generate_streamlines` function can generate a 2D flow field from a
MD trajectory, for instance, lipid molecules in a flat membrane. It can make
use of multiple cores to perform the analyis in parallel (using
:mod:`multiprocessing`).

See Also
--------
MDAnalysis.visualization.streamlines_3D : streamplots in 3D


.. autofunction:: generate_streamlines

"""
import multiprocessing

import numpy as np
import scipy

try:
    import matplotlib
    import matplotlib.path
except ImportError:
    raise ImportError(
        "2d streamplot module requires: matplotlib.path for its "
        "path.Path.contains_points method. The installation "
        "instructions for the matplotlib module can be found here: "
        "http://matplotlib.org/faq/installing_faq.html?highlight=install"
    ) from None

import MDAnalysis


def produce_grid(tuple_of_limits, grid_spacing):
    """Produce a 2D grid for the simulation system.

    The grid is based on the tuple of Cartesian Coordinate limits calculated in
    an earlier step.

    Parameters
    ----------
    tuple_of_limits : tuple
        ``x_min, x_max, y_min, y_max``
    grid_spacing : float
        grid size in all directions in ångström

    Returns
    -------
    grid : array
       ``numpy.mgrid[x_min:x_max:grid_spacing, y_min:y_max:grid_spacing]``
    """
    x_min, x_max, y_min, y_max = tuple_of_limits
    grid = np.mgrid[x_min:x_max:grid_spacing, y_min:y_max:grid_spacing]
    return grid


def split_grid(grid, num_cores):
    """Split the grid into blocks of vertices.

    Take the overall `grid` for the system and split it into lists of
    square vertices that can be distributed to each core.

    Parameters
    ----------
    grid : numpy.array
        2D array
    num_cores : int
        number of partitions to generate

    Returns
    -------
    list_square_vertex_arrays_per_core : array of arrays
         split the list of square vertices
         ``[[v1,v2,v3,v4],[v1,v2,v3,v4],...,...]`` into roughly equally-sized
         sublists to be distributed over the available cores on the system
    list_parent_index_values : array of arrays
         arrays of `[[row, column], [row, column], ...]`` for each core
    current_row : int
         last row + 1
    current_column : int
         last column + 1

    Note
    ----
    Limited to 2D for now.

    """

    # produce an array containing the cartesian coordinates of all vertices in the grid:
    x_array, y_array = grid
    grid_vertex_cartesian_array = np.dstack((x_array, y_array))
    # the grid_vertex_cartesian_array has N_rows, with each row corresponding to a column of coordinates in the grid (
    # so a given row has shape N_rows, 2); overall shape (N_columns_in_grid, N_rows_in_a_column, 2)
    # although I'll eventually want a pure numpy/scipy/vector-based solution, for now I'll allow loops to simplify the
    #  division of the cartesian coordinates into a list of the squares in the grid
    list_all_squares_in_grid = (
        []
    )  # should eventually be a nested list of all the square vertices in the grid/system
    list_parent_index_values = (
        []
    )  # want an ordered list of assignment indices for reconstructing the grid positions
    # in the parent process
    current_column = 0
    while current_column < grid_vertex_cartesian_array.shape[0] - 1:
        # go through all the columns except the last one and account for the square vertices (the last column
        #  has no 'right neighbour')
        current_row = 0
        while current_row < grid_vertex_cartesian_array.shape[1] - 1:
            # all rows except the top row, which doesn't have a row above it for forming squares
            bottom_left_vertex_current_square = grid_vertex_cartesian_array[
                current_column, current_row
            ]
            bottom_right_vertex_current_square = grid_vertex_cartesian_array[
                current_column + 1, current_row
            ]
            top_right_vertex_current_square = grid_vertex_cartesian_array[
                current_column + 1, current_row + 1
            ]
            top_left_vertex_current_square = grid_vertex_cartesian_array[
                current_column, current_row + 1
            ]
            # append the vertices of this square to the overall list of square vertices:
            list_all_squares_in_grid.append(
                [
                    bottom_left_vertex_current_square,
                    bottom_right_vertex_current_square,
                    top_right_vertex_current_square,
                    top_left_vertex_current_square,
                ]
            )
            list_parent_index_values.append([current_row, current_column])
            current_row += 1
        current_column += 1
    # split the list of square vertices [[v1,v2,v3,v4],[v1,v2,v3,v4],...,...] into roughly equally-sized sublists to
    # be distributed over the available cores on the system:
    list_square_vertex_arrays_per_core = np.array_split(
        list_all_squares_in_grid, num_cores
    )
    list_parent_index_values = np.array_split(
        list_parent_index_values, num_cores
    )
    return [
        list_square_vertex_arrays_per_core,
        list_parent_index_values,
        current_row,
        current_column,
    ]


# some private utility functions for trajectory iteration
def _produce_list_indices_point_in_polygon_this_frame(
    vertex_coord_list, relevant_particle_coordinate_array_xy
):
    """
    Produce a list of indices for particles of interest in the current frame.
    The list is ordered according to the order of the squares in the grid.
    Each entry in the list is a tuple of indices for the particles in that square.
    The list is the same length as the number of squares in the grid.
    """
    list_indices_point_in_polygon = []
    for square_vertices in vertex_coord_list:
        path_object = matplotlib.path.Path(square_vertices)
        index_list_in_polygon = np.where(
            path_object.contains_points(relevant_particle_coordinate_array_xy)
        )
        list_indices_point_in_polygon.append(index_list_in_polygon)
    return list_indices_point_in_polygon


def _produce_list_centroids_this_frame(
    list_indices_in_polygon, relevant_particle_coordinate_array_xy
):
    """
    Produce a list of centroids for the particles in the current frame.
    The list is ordered according to the order of the squares in the grid.
    Each entry in the list is a numpy array of the centroid coordinates for the particles in that square.
    The list is the same length as the number of squares in the grid.
    If there are no particles in a square, the entry is None.
    """
    list_centroids_this_frame = []
    for indices in list_indices_in_polygon:
        if (
            not indices[0].size > 0
        ):  # if there are no particles of interest in this particular square
            list_centroids_this_frame.append(None)
        else:
            current_coordinate_array_in_square = (
                relevant_particle_coordinate_array_xy[indices]
            )
            current_square_indices_centroid = np.average(
                current_coordinate_array_in_square, axis=0
            )
            list_centroids_this_frame.append(current_square_indices_centroid)
    return list_centroids_this_frame  # a list of numpy xy centroid arrays for this frame


def per_core_work(
    topology_file_path,
    trajectory_file_path,
    list_square_vertex_arrays_this_core,
    MDA_selection,
    start_frame,
    end_frame,
    reconstruction_index_list,
    maximum_delta_magnitude,
):
    """Run the analysis on one core.

    The code to perform on a given core given the list of square vertices assigned to it.
    """
    # obtain the relevant coordinates for particles of interest
    universe_object = MDAnalysis.Universe(
        topology_file_path, trajectory_file_path
    )
    list_previous_frame_centroids = []
    list_previous_frame_indices = []
    for ts in universe_object.trajectory:
        if ts.frame < start_frame:  # don't start until first specified frame
            continue
        relevant_particle_coordinate_array_xy = universe_object.select_atoms(
            MDA_selection
        ).positions[..., :-1]
        # only 2D / xy coords for now
        # I will need a list of indices for relevant particles falling within each square in THIS frame:
        list_indices_in_squares_this_frame = (
            _produce_list_indices_point_in_polygon_this_frame(
                list_square_vertex_arrays_this_core,
                relevant_particle_coordinate_array_xy,
            )
        )
        # likewise, I will need a list of centroids of particles in each square (same order as above list):
        list_centroids_in_squares_this_frame = (
            _produce_list_centroids_this_frame(
                list_indices_in_squares_this_frame,
                relevant_particle_coordinate_array_xy,
            )
        )
        if (
            list_previous_frame_indices
        ):  # if the previous frame had indices in at least one square I will need to use
            #  those indices to generate the updates to the corresponding centroids in this frame:
            list_centroids_this_frame_using_indices_from_last_frame = (
                _produce_list_centroids_this_frame(
                    list_previous_frame_indices,
                    relevant_particle_coordinate_array_xy,
                )
            )
            # I need to write a velocity of zero if there are any 'empty' squares in either frame:
            xy_deltas_to_write = []
            for square_1_centroid, square_2_centroid in zip(
                list_centroids_this_frame_using_indices_from_last_frame,
                list_previous_frame_centroids,
            ):
                if square_1_centroid is None or square_2_centroid is None:
                    xy_deltas_to_write.append([0, 0])
                else:
                    xy_deltas_to_write.append(
                        np.subtract(
                            square_1_centroid, square_2_centroid
                        ).tolist()
                    )

            # xy_deltas_to_write = np.subtract(np.array(
            # list_centroids_this_frame_using_indices_from_last_frame),np.array(list_previous_frame_centroids))
            xy_deltas_to_write = np.array(xy_deltas_to_write)
            # now filter the array to only contain distances in the range [-8,8] as a placeholder for dealing with PBC
            #  issues (Matthieu seemed to use a limit of 8 as well);
            xy_deltas_to_write = np.clip(
                xy_deltas_to_write,
                -maximum_delta_magnitude,
                maximum_delta_magnitude,
            )

            # with the xy and dx,dy values calculated I need to set the values from this frame to previous frame
            # values in anticipation of the next frame:
            list_previous_frame_centroids = (
                list_centroids_in_squares_this_frame[:]
            )
            list_previous_frame_indices = list_indices_in_squares_this_frame[:]
        else:  # either no points in squares or after the first frame I'll just reset the 'previous' values so they
            # can be used when consecutive frames have proper values
            list_previous_frame_centroids = (
                list_centroids_in_squares_this_frame[:]
            )
            list_previous_frame_indices = list_indices_in_squares_this_frame[:]
        if ts.frame > end_frame:
            break  # stop here
    return list(zip(reconstruction_index_list, xy_deltas_to_write.tolist()))


def generate_streamlines(
    topology_file_path,
    trajectory_file_path,
    grid_spacing,
    MDA_selection,
    start_frame,
    end_frame,
    xmin,
    xmax,
    ymin,
    ymax,
    maximum_delta_magnitude,
    num_cores="maximum",
):
    r"""Produce the x and y components of a 2D streamplot data set.

    Parameters
    ----------
    topology_file_path : str
            Absolute path to the topology file
    trajectory_file_path : str
            Absolute path to the trajectory file. It will normally be desirable
            to filter the trajectory with a tool such as GROMACS
            :program:`g_filter` (see :footcite:p:`Chavent2014`)
    grid_spacing : float
            The spacing between grid lines (angstroms)
    MDA_selection : str
            MDAnalysis selection string
    start_frame : int
            First frame number to parse
    end_frame : int
            Last frame number to parse
    xmin : float
            Minimum coordinate boundary for x-axis (angstroms)
    xmax : float
            Maximum coordinate boundary for x-axis (angstroms)
    ymin : float
            Minimum coordinate boundary for y-axis (angstroms)
    ymax : float
            Maximum coordinate boundary for y-axis (angstroms)
    maximum_delta_magnitude : float
            Absolute value of the largest displacement tolerated for the
            centroid of a group of particles ( angstroms). Values above this
            displacement will not count in the streamplot (treated as
            excessively large displacements crossing the periodic boundary)
    num_cores : int or 'maximum' (optional)
            The number of cores to use. (Default 'maximum' uses all available
            cores)

    Returns
    -------
    dx_array : array of floats
            An array object containing the displacements in the x direction
    dy_array : array of floats
            An array object containing the displacements in the y direction
    average_displacement : float
            :math:`\frac{\sum\sqrt[]{dx^2 + dy^2}}{N}`
    standard_deviation_of_displacement : float
            standard deviation of :math:`\sqrt[]{dx^2 + dy^2}`

    Examples
    --------
    Generate 2D streamlines and plot::

        import matplotlib, matplotlib.pyplot, np
        import MDAnalysis, MDAnalysis.visualization.streamlines

        u1, v1, average_displacement, standard_deviation_of_displacement =
            MDAnalysis.visualization.streamlines.generate_streamlines('testing.gro', 'testing_filtered.xtc',
                    grid_spacing=20, MDA_selection='name PO4', start_frame=2, end_frame=3,
                    xmin=-8.73000049591, xmax= 1225.96008301,
                    ymin= -12.5799999237, ymax=1224.34008789,
                    maximum_delta_magnitude=1.0, num_cores=16)
        x = np.linspace(0, 1200, 61)
        y = np.linspace(0, 1200, 61)
        speed = np.sqrt(u1*u1 + v1*v1)
        fig = matplotlib.pyplot.figure()
        ax = fig.add_subplot(111, aspect='equal')
        ax.set_xlabel('x ($\AA$)')
        ax.set_ylabel('y ($\AA$)')
        ax.streamplot(x, y, u1, v1, density=(10,10), color=speed, linewidth=3*speed/speed.max())
        fig.savefig('testing_streamline.png',dpi=300)

    .. image:: testing_streamline.png


    References
    ----------
    .. footbibliography::


    See Also
    --------
    MDAnalysis.visualization.streamlines_3D.generate_streamlines_3d

    """
    # work out the number of cores to use:
    if num_cores == "maximum":
        num_cores = multiprocessing.cpu_count()  # use all available cores
    else:
        num_cores = num_cores  # use the value specified by the user
        # assert isinstance(num_cores,(int,long)), "The number of specified cores must (of course) be an integer."
    np.seterr(all="warn", over="raise")
    parent_list_deltas = []  # collect all data from child processes here

    def log_result_to_parent(delta_array):
        parent_list_deltas.extend(delta_array)

    tuple_of_limits = (xmin, xmax, ymin, ymax)
    grid = produce_grid(
        tuple_of_limits=tuple_of_limits, grid_spacing=grid_spacing
    )
    (
        list_square_vertex_arrays_per_core,
        list_parent_index_values,
        total_rows,
        total_columns,
    ) = split_grid(grid=grid, num_cores=num_cores)
    pool = multiprocessing.Pool(num_cores)
    for vertex_sublist, index_sublist in zip(
        list_square_vertex_arrays_per_core, list_parent_index_values
    ):
        pool.apply_async(
            per_core_work,
            args=(
                topology_file_path,
                trajectory_file_path,
                vertex_sublist,
                MDA_selection,
                start_frame,
                end_frame,
                index_sublist,
                maximum_delta_magnitude,
            ),
            callback=log_result_to_parent,
        )
    pool.close()
    pool.join()
    dx_array = np.zeros((total_rows, total_columns))
    dy_array = np.zeros((total_rows, total_columns))
    # the parent_list_deltas is shaped like this: [ ([row_index,column_index],[dx,dy]), ... (...),...,]
    for (
        index_array,
        delta_array,
    ) in (
        parent_list_deltas
    ):  # go through the list in the parent process and assign to the
        #  appropriate positions in the dx and dy matrices:
        # build in a filter to replace all values at the cap (currently between -8,8) with 0 to match Matthieu's code
        # (I think eventually we'll reduce the cap to a narrower boundary though)
        index_1 = index_array.tolist()[0]
        index_2 = index_array.tolist()[1]
        if abs(delta_array[0]) == maximum_delta_magnitude:
            dx_array[index_1, index_2] = 0
        else:
            dx_array[index_1, index_2] = delta_array[0]
        if abs(delta_array[1]) == maximum_delta_magnitude:
            dy_array[index_1, index_2] = 0
        else:
            dy_array[index_1, index_2] = delta_array[1]

    # at Matthieu's request, we now want to calculate the average and standard deviation of the displacement values:
    displacement_array = np.sqrt(dx_array**2 + dy_array**2)
    average_displacement = np.average(displacement_array)
    standard_deviation_of_displacement = np.std(displacement_array)

    return (
        dx_array,
        dy_array,
        average_displacement,
        standard_deviation_of_displacement,
    )