1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
|
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""Streamplots (2D) --- :mod:`MDAnalysis.visualization.streamlines`
=================================================================
:Authors: Tyler Reddy and Matthieu Chavent
:Year: 2014
:Copyright: Lesser GNU Public License v2.1+
The :func:`generate_streamlines` function can generate a 2D flow field from a
MD trajectory, for instance, lipid molecules in a flat membrane. It can make
use of multiple cores to perform the analyis in parallel (using
:mod:`multiprocessing`).
See Also
--------
MDAnalysis.visualization.streamlines_3D : streamplots in 3D
.. autofunction:: generate_streamlines
"""
import multiprocessing
import numpy as np
import scipy
try:
import matplotlib
import matplotlib.path
except ImportError:
raise ImportError(
"2d streamplot module requires: matplotlib.path for its "
"path.Path.contains_points method. The installation "
"instructions for the matplotlib module can be found here: "
"http://matplotlib.org/faq/installing_faq.html?highlight=install"
) from None
import MDAnalysis
def produce_grid(tuple_of_limits, grid_spacing):
"""Produce a 2D grid for the simulation system.
The grid is based on the tuple of Cartesian Coordinate limits calculated in
an earlier step.
Parameters
----------
tuple_of_limits : tuple
``x_min, x_max, y_min, y_max``
grid_spacing : float
grid size in all directions in ångström
Returns
-------
grid : array
``numpy.mgrid[x_min:x_max:grid_spacing, y_min:y_max:grid_spacing]``
"""
x_min, x_max, y_min, y_max = tuple_of_limits
grid = np.mgrid[x_min:x_max:grid_spacing, y_min:y_max:grid_spacing]
return grid
def split_grid(grid, num_cores):
"""Split the grid into blocks of vertices.
Take the overall `grid` for the system and split it into lists of
square vertices that can be distributed to each core.
Parameters
----------
grid : numpy.array
2D array
num_cores : int
number of partitions to generate
Returns
-------
list_square_vertex_arrays_per_core : array of arrays
split the list of square vertices
``[[v1,v2,v3,v4],[v1,v2,v3,v4],...,...]`` into roughly equally-sized
sublists to be distributed over the available cores on the system
list_parent_index_values : array of arrays
arrays of `[[row, column], [row, column], ...]`` for each core
current_row : int
last row + 1
current_column : int
last column + 1
Note
----
Limited to 2D for now.
"""
# produce an array containing the cartesian coordinates of all vertices in the grid:
x_array, y_array = grid
grid_vertex_cartesian_array = np.dstack((x_array, y_array))
# the grid_vertex_cartesian_array has N_rows, with each row corresponding to a column of coordinates in the grid (
# so a given row has shape N_rows, 2); overall shape (N_columns_in_grid, N_rows_in_a_column, 2)
# although I'll eventually want a pure numpy/scipy/vector-based solution, for now I'll allow loops to simplify the
# division of the cartesian coordinates into a list of the squares in the grid
list_all_squares_in_grid = (
[]
) # should eventually be a nested list of all the square vertices in the grid/system
list_parent_index_values = (
[]
) # want an ordered list of assignment indices for reconstructing the grid positions
# in the parent process
current_column = 0
while current_column < grid_vertex_cartesian_array.shape[0] - 1:
# go through all the columns except the last one and account for the square vertices (the last column
# has no 'right neighbour')
current_row = 0
while current_row < grid_vertex_cartesian_array.shape[1] - 1:
# all rows except the top row, which doesn't have a row above it for forming squares
bottom_left_vertex_current_square = grid_vertex_cartesian_array[
current_column, current_row
]
bottom_right_vertex_current_square = grid_vertex_cartesian_array[
current_column + 1, current_row
]
top_right_vertex_current_square = grid_vertex_cartesian_array[
current_column + 1, current_row + 1
]
top_left_vertex_current_square = grid_vertex_cartesian_array[
current_column, current_row + 1
]
# append the vertices of this square to the overall list of square vertices:
list_all_squares_in_grid.append(
[
bottom_left_vertex_current_square,
bottom_right_vertex_current_square,
top_right_vertex_current_square,
top_left_vertex_current_square,
]
)
list_parent_index_values.append([current_row, current_column])
current_row += 1
current_column += 1
# split the list of square vertices [[v1,v2,v3,v4],[v1,v2,v3,v4],...,...] into roughly equally-sized sublists to
# be distributed over the available cores on the system:
list_square_vertex_arrays_per_core = np.array_split(
list_all_squares_in_grid, num_cores
)
list_parent_index_values = np.array_split(
list_parent_index_values, num_cores
)
return [
list_square_vertex_arrays_per_core,
list_parent_index_values,
current_row,
current_column,
]
# some private utility functions for trajectory iteration
def _produce_list_indices_point_in_polygon_this_frame(
vertex_coord_list, relevant_particle_coordinate_array_xy
):
"""
Produce a list of indices for particles of interest in the current frame.
The list is ordered according to the order of the squares in the grid.
Each entry in the list is a tuple of indices for the particles in that square.
The list is the same length as the number of squares in the grid.
"""
list_indices_point_in_polygon = []
for square_vertices in vertex_coord_list:
path_object = matplotlib.path.Path(square_vertices)
index_list_in_polygon = np.where(
path_object.contains_points(relevant_particle_coordinate_array_xy)
)
list_indices_point_in_polygon.append(index_list_in_polygon)
return list_indices_point_in_polygon
def _produce_list_centroids_this_frame(
list_indices_in_polygon, relevant_particle_coordinate_array_xy
):
"""
Produce a list of centroids for the particles in the current frame.
The list is ordered according to the order of the squares in the grid.
Each entry in the list is a numpy array of the centroid coordinates for the particles in that square.
The list is the same length as the number of squares in the grid.
If there are no particles in a square, the entry is None.
"""
list_centroids_this_frame = []
for indices in list_indices_in_polygon:
if (
not indices[0].size > 0
): # if there are no particles of interest in this particular square
list_centroids_this_frame.append(None)
else:
current_coordinate_array_in_square = (
relevant_particle_coordinate_array_xy[indices]
)
current_square_indices_centroid = np.average(
current_coordinate_array_in_square, axis=0
)
list_centroids_this_frame.append(current_square_indices_centroid)
return list_centroids_this_frame # a list of numpy xy centroid arrays for this frame
def per_core_work(
topology_file_path,
trajectory_file_path,
list_square_vertex_arrays_this_core,
MDA_selection,
start_frame,
end_frame,
reconstruction_index_list,
maximum_delta_magnitude,
):
"""Run the analysis on one core.
The code to perform on a given core given the list of square vertices assigned to it.
"""
# obtain the relevant coordinates for particles of interest
universe_object = MDAnalysis.Universe(
topology_file_path, trajectory_file_path
)
list_previous_frame_centroids = []
list_previous_frame_indices = []
for ts in universe_object.trajectory:
if ts.frame < start_frame: # don't start until first specified frame
continue
relevant_particle_coordinate_array_xy = universe_object.select_atoms(
MDA_selection
).positions[..., :-1]
# only 2D / xy coords for now
# I will need a list of indices for relevant particles falling within each square in THIS frame:
list_indices_in_squares_this_frame = (
_produce_list_indices_point_in_polygon_this_frame(
list_square_vertex_arrays_this_core,
relevant_particle_coordinate_array_xy,
)
)
# likewise, I will need a list of centroids of particles in each square (same order as above list):
list_centroids_in_squares_this_frame = (
_produce_list_centroids_this_frame(
list_indices_in_squares_this_frame,
relevant_particle_coordinate_array_xy,
)
)
if (
list_previous_frame_indices
): # if the previous frame had indices in at least one square I will need to use
# those indices to generate the updates to the corresponding centroids in this frame:
list_centroids_this_frame_using_indices_from_last_frame = (
_produce_list_centroids_this_frame(
list_previous_frame_indices,
relevant_particle_coordinate_array_xy,
)
)
# I need to write a velocity of zero if there are any 'empty' squares in either frame:
xy_deltas_to_write = []
for square_1_centroid, square_2_centroid in zip(
list_centroids_this_frame_using_indices_from_last_frame,
list_previous_frame_centroids,
):
if square_1_centroid is None or square_2_centroid is None:
xy_deltas_to_write.append([0, 0])
else:
xy_deltas_to_write.append(
np.subtract(
square_1_centroid, square_2_centroid
).tolist()
)
# xy_deltas_to_write = np.subtract(np.array(
# list_centroids_this_frame_using_indices_from_last_frame),np.array(list_previous_frame_centroids))
xy_deltas_to_write = np.array(xy_deltas_to_write)
# now filter the array to only contain distances in the range [-8,8] as a placeholder for dealing with PBC
# issues (Matthieu seemed to use a limit of 8 as well);
xy_deltas_to_write = np.clip(
xy_deltas_to_write,
-maximum_delta_magnitude,
maximum_delta_magnitude,
)
# with the xy and dx,dy values calculated I need to set the values from this frame to previous frame
# values in anticipation of the next frame:
list_previous_frame_centroids = (
list_centroids_in_squares_this_frame[:]
)
list_previous_frame_indices = list_indices_in_squares_this_frame[:]
else: # either no points in squares or after the first frame I'll just reset the 'previous' values so they
# can be used when consecutive frames have proper values
list_previous_frame_centroids = (
list_centroids_in_squares_this_frame[:]
)
list_previous_frame_indices = list_indices_in_squares_this_frame[:]
if ts.frame > end_frame:
break # stop here
return list(zip(reconstruction_index_list, xy_deltas_to_write.tolist()))
def generate_streamlines(
topology_file_path,
trajectory_file_path,
grid_spacing,
MDA_selection,
start_frame,
end_frame,
xmin,
xmax,
ymin,
ymax,
maximum_delta_magnitude,
num_cores="maximum",
):
r"""Produce the x and y components of a 2D streamplot data set.
Parameters
----------
topology_file_path : str
Absolute path to the topology file
trajectory_file_path : str
Absolute path to the trajectory file. It will normally be desirable
to filter the trajectory with a tool such as GROMACS
:program:`g_filter` (see :footcite:p:`Chavent2014`)
grid_spacing : float
The spacing between grid lines (angstroms)
MDA_selection : str
MDAnalysis selection string
start_frame : int
First frame number to parse
end_frame : int
Last frame number to parse
xmin : float
Minimum coordinate boundary for x-axis (angstroms)
xmax : float
Maximum coordinate boundary for x-axis (angstroms)
ymin : float
Minimum coordinate boundary for y-axis (angstroms)
ymax : float
Maximum coordinate boundary for y-axis (angstroms)
maximum_delta_magnitude : float
Absolute value of the largest displacement tolerated for the
centroid of a group of particles ( angstroms). Values above this
displacement will not count in the streamplot (treated as
excessively large displacements crossing the periodic boundary)
num_cores : int or 'maximum' (optional)
The number of cores to use. (Default 'maximum' uses all available
cores)
Returns
-------
dx_array : array of floats
An array object containing the displacements in the x direction
dy_array : array of floats
An array object containing the displacements in the y direction
average_displacement : float
:math:`\frac{\sum\sqrt[]{dx^2 + dy^2}}{N}`
standard_deviation_of_displacement : float
standard deviation of :math:`\sqrt[]{dx^2 + dy^2}`
Examples
--------
Generate 2D streamlines and plot::
import matplotlib, matplotlib.pyplot, np
import MDAnalysis, MDAnalysis.visualization.streamlines
u1, v1, average_displacement, standard_deviation_of_displacement =
MDAnalysis.visualization.streamlines.generate_streamlines('testing.gro', 'testing_filtered.xtc',
grid_spacing=20, MDA_selection='name PO4', start_frame=2, end_frame=3,
xmin=-8.73000049591, xmax= 1225.96008301,
ymin= -12.5799999237, ymax=1224.34008789,
maximum_delta_magnitude=1.0, num_cores=16)
x = np.linspace(0, 1200, 61)
y = np.linspace(0, 1200, 61)
speed = np.sqrt(u1*u1 + v1*v1)
fig = matplotlib.pyplot.figure()
ax = fig.add_subplot(111, aspect='equal')
ax.set_xlabel('x ($\AA$)')
ax.set_ylabel('y ($\AA$)')
ax.streamplot(x, y, u1, v1, density=(10,10), color=speed, linewidth=3*speed/speed.max())
fig.savefig('testing_streamline.png',dpi=300)
.. image:: testing_streamline.png
References
----------
.. footbibliography::
See Also
--------
MDAnalysis.visualization.streamlines_3D.generate_streamlines_3d
"""
# work out the number of cores to use:
if num_cores == "maximum":
num_cores = multiprocessing.cpu_count() # use all available cores
else:
num_cores = num_cores # use the value specified by the user
# assert isinstance(num_cores,(int,long)), "The number of specified cores must (of course) be an integer."
np.seterr(all="warn", over="raise")
parent_list_deltas = [] # collect all data from child processes here
def log_result_to_parent(delta_array):
parent_list_deltas.extend(delta_array)
tuple_of_limits = (xmin, xmax, ymin, ymax)
grid = produce_grid(
tuple_of_limits=tuple_of_limits, grid_spacing=grid_spacing
)
(
list_square_vertex_arrays_per_core,
list_parent_index_values,
total_rows,
total_columns,
) = split_grid(grid=grid, num_cores=num_cores)
pool = multiprocessing.Pool(num_cores)
for vertex_sublist, index_sublist in zip(
list_square_vertex_arrays_per_core, list_parent_index_values
):
pool.apply_async(
per_core_work,
args=(
topology_file_path,
trajectory_file_path,
vertex_sublist,
MDA_selection,
start_frame,
end_frame,
index_sublist,
maximum_delta_magnitude,
),
callback=log_result_to_parent,
)
pool.close()
pool.join()
dx_array = np.zeros((total_rows, total_columns))
dy_array = np.zeros((total_rows, total_columns))
# the parent_list_deltas is shaped like this: [ ([row_index,column_index],[dx,dy]), ... (...),...,]
for (
index_array,
delta_array,
) in (
parent_list_deltas
): # go through the list in the parent process and assign to the
# appropriate positions in the dx and dy matrices:
# build in a filter to replace all values at the cap (currently between -8,8) with 0 to match Matthieu's code
# (I think eventually we'll reduce the cap to a narrower boundary though)
index_1 = index_array.tolist()[0]
index_2 = index_array.tolist()[1]
if abs(delta_array[0]) == maximum_delta_magnitude:
dx_array[index_1, index_2] = 0
else:
dx_array[index_1, index_2] = delta_array[0]
if abs(delta_array[1]) == maximum_delta_magnitude:
dy_array[index_1, index_2] = 0
else:
dy_array[index_1, index_2] = delta_array[1]
# at Matthieu's request, we now want to calculate the average and standard deviation of the displacement values:
displacement_array = np.sqrt(dx_array**2 + dy_array**2)
average_displacement = np.average(displacement_array)
standard_deviation_of_displacement = np.std(displacement_array)
return (
dx_array,
dy_array,
average_displacement,
standard_deviation_of_displacement,
)
|