1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898
|
"""Module to read and write atoms in cif file format.
See http://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for a
description of the file format. STAR extensions as save frames,
global blocks, nested loops and multi-data values are not supported.
The "latin-1" encoding is required by the IUCR specification.
"""
import collections.abc
import io
import re
import shlex
import warnings
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
import numpy as np
from ase import Atoms
from ase.cell import Cell
from ase.io.cif_unicode import format_unicode, handle_subscripts
from ase.spacegroup import crystal
from ase.spacegroup.spacegroup import Spacegroup, spacegroup_from_data
from ase.utils import iofunction
rhombohedral_spacegroups = {146, 148, 155, 160, 161, 166, 167}
old_spacegroup_names = {'Abm2': 'Aem2',
'Aba2': 'Aea2',
'Cmca': 'Cmce',
'Cmma': 'Cmme',
'Ccca': 'Ccc1'}
# CIF maps names to either single values or to multiple values via loops.
CIFDataValue = Union[str, int, float]
CIFData = Union[CIFDataValue, List[CIFDataValue]]
def convert_value(value: str) -> CIFDataValue:
"""Convert CIF value string to corresponding python type."""
value = value.strip()
if re.match('(".*")|(\'.*\')$', value):
return handle_subscripts(value[1:-1])
elif re.match(r'[+-]?\d+$', value):
return int(value)
elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$', value):
return float(value)
elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+\)$',
value):
return float(value[:value.index('(')]) # strip off uncertainties
elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+$',
value):
warnings.warn(f'Badly formed number: "{value}"')
return float(value[:value.index('(')]) # strip off uncertainties
else:
return handle_subscripts(value)
def parse_multiline_string(lines: List[str], line: str) -> str:
"""Parse semicolon-enclosed multiline string and return it."""
assert line[0] == ';'
strings = [line[1:].lstrip()]
while True:
line = lines.pop().strip()
if line[:1] == ';':
break
strings.append(line)
return '\n'.join(strings).strip()
def parse_singletag(lines: List[str], line: str) -> Tuple[str, CIFDataValue]:
"""Parse a CIF tag (entries starting with underscore). Returns
a key-value pair."""
kv = line.split(None, 1)
if len(kv) == 1:
key = line
line = lines.pop().strip()
while not line or line[0] == '#':
line = lines.pop().strip()
if line[0] == ';':
value = parse_multiline_string(lines, line)
else:
value = line
else:
key, value = kv
return key, convert_value(value)
def parse_cif_loop_headers(lines: List[str]) -> Iterator[str]:
while lines:
line = lines.pop()
tokens = line.split()
if len(tokens) == 1 and tokens[0].startswith('_'):
header = tokens[0].lower()
yield header
else:
lines.append(line) # 'undo' pop
return
def parse_cif_loop_data(lines: List[str],
ncolumns: int) -> List[List[CIFDataValue]]:
columns: List[List[CIFDataValue]] = [[] for _ in range(ncolumns)]
tokens = []
while lines:
line = lines.pop().strip()
lowerline = line.lower()
if (not line or
line.startswith('_') or
lowerline.startswith('data_') or
lowerline.startswith('loop_')):
lines.append(line)
break
if line.startswith('#'):
continue
line = line.split(' #')[0]
if line.startswith(';'):
moretokens = [parse_multiline_string(lines, line)]
else:
if ncolumns == 1:
moretokens = [line]
else:
moretokens = shlex.split(line, posix=False)
tokens += moretokens
if len(tokens) < ncolumns:
continue
if len(tokens) == ncolumns:
for i, token in enumerate(tokens):
columns[i].append(convert_value(token))
else:
warnings.warn(f'Wrong number {len(tokens)} of tokens, '
f'expected {ncolumns}: {tokens}')
# (Due to continue statements we cannot move this to start of loop)
tokens = []
if tokens:
assert len(tokens) < ncolumns
raise RuntimeError('CIF loop ended unexpectedly with incomplete row: '
f'{tokens}, expected {ncolumns} tokens')
return columns
def parse_loop(lines: List[str]) -> Dict[str, List[CIFDataValue]]:
"""Parse a CIF loop. Returns a dict with column tag names as keys
and a lists of the column content as values."""
headers = list(parse_cif_loop_headers(lines))
# Dict would be better. But there can be repeated headers.
columns = parse_cif_loop_data(lines, len(headers))
columns_dict = {}
for i, header in enumerate(headers):
if header in columns_dict:
warnings.warn(f'Duplicated loop tags: {header}')
else:
columns_dict[header] = columns[i]
return columns_dict
def parse_items(lines: List[str], line: str) -> Dict[str, CIFData]:
"""Parse a CIF data items and return a dict with all tags."""
tags: Dict[str, CIFData] = {}
while True:
if not lines:
break
line = lines.pop().strip()
if not line:
continue
lowerline = line.lower()
if not line or line.startswith('#'):
continue
elif line.startswith('_'):
key, value = parse_singletag(lines, line)
tags[key.lower()] = value
elif lowerline.startswith('loop_'):
tags.update(parse_loop(lines))
elif lowerline.startswith('data_'):
if line:
lines.append(line)
break
elif line.startswith(';'):
parse_multiline_string(lines, line)
else:
raise ValueError(f'Unexpected CIF file entry: "{line}"')
return tags
class NoStructureData(RuntimeError):
pass
class CIFBlock(collections.abc.Mapping):
"""A block (i.e., a single system) in a crystallographic information file.
Use this object to query CIF tags or import information as ASE objects."""
cell_tags = ['_cell_length_a', '_cell_length_b', '_cell_length_c',
'_cell_angle_alpha', '_cell_angle_beta', '_cell_angle_gamma']
def __init__(self, name: str, tags: Dict[str, CIFData]):
self.name = name
self._tags = tags
def __repr__(self) -> str:
tags = set(self._tags)
return f'CIFBlock({self.name}, tags={tags})'
def __getitem__(self, key: str) -> CIFData:
return self._tags[key]
def __iter__(self) -> Iterator[str]:
return iter(self._tags)
def __len__(self) -> int:
return len(self._tags)
def get(self, key, default=None):
return self._tags.get(key, default)
def get_cellpar(self) -> Optional[List]:
try:
return [self[tag] for tag in self.cell_tags]
except KeyError:
return None
def get_cell(self) -> Cell:
cellpar = self.get_cellpar()
if cellpar is None:
return Cell.new([0, 0, 0])
return Cell.new(cellpar)
def _raw_scaled_positions(self) -> Optional[np.ndarray]:
coords = [self.get(name) for name in ['_atom_site_fract_x',
'_atom_site_fract_y',
'_atom_site_fract_z']]
# XXX Shall we try to handle mixed coordinates?
# (Some scaled vs others fractional)
if None in coords:
return None
return np.array(coords).T
def _raw_positions(self) -> Optional[np.ndarray]:
coords = [self.get('_atom_site_cartn_x'),
self.get('_atom_site_cartn_y'),
self.get('_atom_site_cartn_z')]
if None in coords:
return None
return np.array(coords).T
def _get_site_coordinates(self):
scaled = self._raw_scaled_positions()
if scaled is not None:
return 'scaled', scaled
cartesian = self._raw_positions()
if cartesian is None:
raise NoStructureData('No positions found in structure')
return 'cartesian', cartesian
def _get_symbols_with_deuterium(self):
labels = self._get_any(['_atom_site_type_symbol',
'_atom_site_label'])
if labels is None:
raise NoStructureData('No symbols')
symbols = []
for label in labels:
if label == '.' or label == '?':
raise NoStructureData('Symbols are undetermined')
# Strip off additional labeling on chemical symbols
match = re.search(r'([A-Z][a-z]?)', label)
symbol = match.group(0)
symbols.append(symbol)
return symbols
def get_symbols(self) -> List[str]:
symbols = self._get_symbols_with_deuterium()
return [symbol if symbol != 'D' else 'H' for symbol in symbols]
def _where_deuterium(self):
return np.array([symbol == 'D' for symbol
in self._get_symbols_with_deuterium()], bool)
def _get_masses(self) -> Optional[np.ndarray]:
mask = self._where_deuterium()
if not any(mask):
return None
symbols = self.get_symbols()
masses = Atoms(symbols).get_masses()
masses[mask] = 2.01355
return masses
def _get_any(self, names):
for name in names:
if name in self:
return self[name]
return None
def _get_spacegroup_number(self):
# Symmetry specification, see
# http://www.iucr.org/resources/cif/dictionaries/cif_sym for a
# complete list of official keys. In addition we also try to
# support some commonly used depricated notations
return self._get_any(['_space_group.it_number',
'_space_group_it_number',
'_symmetry_int_tables_number'])
def _get_spacegroup_name(self):
hm_symbol = self._get_any(['_space_group_name_h-m_alt',
'_symmetry_space_group_name_h-m',
'_space_group.Patterson_name_h-m',
'_space_group.patterson_name_h-m'])
hm_symbol = old_spacegroup_names.get(hm_symbol, hm_symbol)
return hm_symbol
def _get_sitesym(self):
sitesym = self._get_any(['_space_group_symop_operation_xyz',
'_space_group_symop.operation_xyz',
'_symmetry_equiv_pos_as_xyz'])
if isinstance(sitesym, str):
sitesym = [sitesym]
return sitesym
def _get_fractional_occupancies(self):
return self.get('_atom_site_occupancy')
def _get_setting(self) -> Optional[int]:
setting_str = self.get('_symmetry_space_group_setting')
if setting_str is None:
return None
setting = int(setting_str)
if setting not in [1, 2]:
raise ValueError(
f'Spacegroup setting must be 1 or 2, not {setting}')
return setting
def get_spacegroup(self, subtrans_included) -> Spacegroup:
# XXX The logic in this method needs serious cleaning up!
no = self._get_spacegroup_number()
if isinstance(no, str):
# If the value was specified as "key 'value'" with ticks,
# then "integer values" become strings and we'll have to
# manually convert it:
no = int(no)
hm_symbol = self._get_spacegroup_name()
sitesym = self._get_sitesym()
if sitesym:
# Special cases: sitesym can be None or an empty list.
# The empty list could be replaced with just the identity
# function, but it seems more correct to try to get the
# spacegroup number and derive the symmetries for that.
subtrans = [(0.0, 0.0, 0.0)] if subtrans_included else None
spacegroup = spacegroup_from_data(
no=no, symbol=hm_symbol, sitesym=sitesym,
subtrans=subtrans,
setting=1) # should the setting be passed from somewhere?
elif no is not None:
spacegroup = no
elif hm_symbol is not None:
spacegroup = hm_symbol
else:
spacegroup = 1
setting_std = self._get_setting()
setting = 1
setting_name = None
if '_symmetry_space_group_setting' in self:
assert setting_std is not None
setting = setting_std
elif '_space_group_crystal_system' in self:
setting_name = self['_space_group_crystal_system']
elif '_symmetry_cell_setting' in self:
setting_name = self['_symmetry_cell_setting']
if setting_name:
no = Spacegroup(spacegroup).no
if no in rhombohedral_spacegroups:
if setting_name == 'hexagonal':
setting = 1
elif setting_name in ('trigonal', 'rhombohedral'):
setting = 2
else:
warnings.warn(
f'unexpected crystal system {setting_name!r} '
f'for space group {spacegroup!r}')
# FIXME - check for more crystal systems...
else:
warnings.warn(
f'crystal system {setting_name!r} is not '
f'interpreted for space group {spacegroup!r}. '
'This may result in wrong setting!')
spg = Spacegroup(spacegroup, setting)
if no is not None:
assert int(spg) == no, (int(spg), no)
return spg
def get_unsymmetrized_structure(self) -> Atoms:
"""Return Atoms without symmetrizing coordinates.
This returns a (normally) unphysical Atoms object
corresponding only to those coordinates included
in the CIF file, useful for e.g. debugging.
This method may change behaviour in the future."""
symbols = self.get_symbols()
coordtype, coords = self._get_site_coordinates()
atoms = Atoms(symbols=symbols,
cell=self.get_cell(),
masses=self._get_masses())
if coordtype == 'scaled':
atoms.set_scaled_positions(coords)
else:
assert coordtype == 'cartesian'
atoms.positions[:] = coords
return atoms
def has_structure(self):
"""Whether this CIF block has an atomic configuration."""
try:
self.get_symbols()
self._get_site_coordinates()
except NoStructureData:
return False
else:
return True
def get_atoms(self, store_tags=False, primitive_cell=False,
subtrans_included=True, fractional_occupancies=True) -> Atoms:
"""Returns an Atoms object from a cif tags dictionary. See read_cif()
for a description of the arguments."""
if primitive_cell and subtrans_included:
raise RuntimeError(
'Primitive cell cannot be determined when sublattice '
'translations are included in the symmetry operations listed '
'in the CIF file, i.e. when `subtrans_included` is True.')
cell = self.get_cell()
assert cell.rank in [0, 3]
kwargs: Dict[str, Any] = {}
if store_tags:
kwargs['info'] = self._tags.copy()
if fractional_occupancies:
occupancies = self._get_fractional_occupancies()
else:
occupancies = None
if occupancies is not None:
# no warnings in this case
kwargs['onduplicates'] = 'keep'
# The unsymmetrized_structure is not the asymmetric unit
# because the asymmetric unit should have (in general) a smaller cell,
# whereas we have the full cell.
unsymmetrized_structure = self.get_unsymmetrized_structure()
if cell.rank == 3:
spacegroup = self.get_spacegroup(subtrans_included)
atoms = crystal(unsymmetrized_structure,
spacegroup=spacegroup,
setting=spacegroup.setting,
occupancies=occupancies,
primitive_cell=primitive_cell,
**kwargs)
else:
atoms = unsymmetrized_structure
if kwargs.get('info') is not None:
atoms.info.update(kwargs['info'])
if occupancies is not None:
occ_dict = {
str(i): {sym: occupancies[i]}
for i, sym in enumerate(atoms.symbols)
}
atoms.info['occupancy'] = occ_dict
return atoms
def parse_block(lines: List[str], line: str) -> CIFBlock:
assert line.lower().startswith('data_')
blockname = line.split('_', 1)[1].rstrip()
tags = parse_items(lines, line)
return CIFBlock(blockname, tags)
def parse_cif(fileobj, reader='ase') -> Iterator[CIFBlock]:
if reader == 'ase':
return parse_cif_ase(fileobj)
elif reader == 'pycodcif':
return parse_cif_pycodcif(fileobj)
else:
raise ValueError(f'No such reader: {reader}')
def parse_cif_ase(fileobj) -> Iterator[CIFBlock]:
"""Parse a CIF file using ase CIF parser."""
if isinstance(fileobj, str):
with open(fileobj, 'rb') as fileobj:
data = fileobj.read()
else:
data = fileobj.read()
if isinstance(data, bytes):
data = data.decode('latin1')
data = format_unicode(data)
lines = [e for e in data.split('\n') if len(e) > 0]
if len(lines) > 0 and lines[0].rstrip() == '#\\#CIF_2.0':
warnings.warn('CIF v2.0 file format detected; `ase` CIF reader might '
'incorrectly interpret some syntax constructions, use '
'`pycodcif` reader instead')
lines = [''] + lines[::-1] # all lines (reversed)
while lines:
line = lines.pop().strip()
if not line or line.startswith('#'):
continue
yield parse_block(lines, line)
def parse_cif_pycodcif(fileobj) -> Iterator[CIFBlock]:
"""Parse a CIF file using pycodcif CIF parser."""
if not isinstance(fileobj, str):
fileobj = fileobj.name
try:
from pycodcif import parse
except ImportError:
raise ImportError(
'parse_cif_pycodcif requires pycodcif ' +
'(http://wiki.crystallography.net/cod-tools/pycodcif/)')
data, _, _ = parse(fileobj)
for datablock in data:
tags = datablock['values']
for tag in tags.keys():
values = [convert_value(x) for x in tags[tag]]
if len(values) == 1:
tags[tag] = values[0]
else:
tags[tag] = values
yield CIFBlock(datablock['name'], tags)
def iread_cif(
fileobj,
index=-1,
store_tags: bool = False,
primitive_cell: bool = False,
subtrans_included: bool = True,
fractional_occupancies: bool = True,
reader: str = 'ase',
) -> Iterator[Atoms]:
# Find all CIF blocks with valid crystal data
# TODO: return Atoms of the block name ``index`` if it is a string.
images = []
for block in parse_cif(fileobj, reader):
if not block.has_structure():
continue
atoms = block.get_atoms(
store_tags, primitive_cell,
subtrans_included,
fractional_occupancies=fractional_occupancies)
images.append(atoms)
if index is None or index == ':':
index = slice(None, None, None)
if not isinstance(index, (slice, str)):
index = slice(index, (index + 1) or None)
for atoms in images[index]:
yield atoms
def read_cif(
fileobj,
index=-1,
*,
store_tags: bool = False,
primitive_cell: bool = False,
subtrans_included: bool = True,
fractional_occupancies: bool = True,
reader: str = 'ase',
) -> Union[Atoms, List[Atoms]]:
"""Read Atoms object from CIF file.
Parameters
----------
store_tags : bool
If true, the *info* attribute of the returned Atoms object will be
populated with all tags in the corresponding cif data block.
primitive_cell : bool
If true, the primitive cell is built instead of the conventional cell.
subtrans_included : bool
If true, sublattice translations are assumed to be included among the
symmetry operations listed in the CIF file (seems to be the common
behaviour of CIF files).
Otherwise the sublattice translations are determined from setting 1 of
the extracted space group. A result of setting this flag to true, is
that it will not be possible to determine the primitive cell.
fractional_occupancies : bool
If true, the resulting atoms object will be tagged equipped with a
dictionary `occupancy`. The keys of this dictionary will be integers
converted to strings. The conversion to string is done in order to
avoid troubles with JSON encoding/decoding of the dictionaries with
non-string keys.
Also, in case of mixed occupancies, the atom's chemical symbol will be
that of the most dominant species.
reader : str
Select CIF reader.
* ``ase`` : built-in CIF reader (default)
* ``pycodcif`` : CIF reader based on ``pycodcif`` package
Notes
-----
Only blocks with valid crystal data will be included.
"""
g = iread_cif(
fileobj,
index,
store_tags,
primitive_cell,
subtrans_included,
fractional_occupancies,
reader,
)
if isinstance(index, (slice, str)):
# Return list of atoms
return list(g)
else:
# Return single atoms object
return next(g)
def format_cell(cell: Cell) -> str:
assert cell.rank == 3
lines = []
for name, value in zip(CIFBlock.cell_tags, cell.cellpar()):
line = f'{name:20} {value}\n'
lines.append(line)
assert len(lines) == 6
return ''.join(lines)
def format_generic_spacegroup_info() -> str:
# We assume no symmetry whatsoever
return '\n'.join([
'_space_group_name_H-M_alt "P 1"',
'_space_group_IT_number 1',
'',
'loop_',
' _space_group_symop_operation_xyz',
" 'x, y, z'",
'',
])
class CIFLoop:
def __init__(self):
self.names = []
self.formats = []
self.arrays = []
def add(self, name, array, fmt):
assert name.startswith('_')
self.names.append(name)
self.formats.append(fmt)
self.arrays.append(array)
if len(self.arrays[0]) != len(self.arrays[-1]):
raise ValueError(f'Loop data "{name}" has {len(array)} '
'elements, expected {len(self.arrays[0])}')
def tostring(self):
lines = []
append = lines.append
append('loop_')
for name in self.names:
append(f' {name}')
template = ' ' + ' '.join(self.formats)
ncolumns = len(self.arrays)
nrows = len(self.arrays[0]) if ncolumns > 0 else 0
for row in range(nrows):
arraydata = [array[row] for array in self.arrays]
line = template.format(*arraydata)
append(line)
append('')
return '\n'.join(lines)
@iofunction('wb')
def write_cif(fd, images, cif_format=None,
wrap=True, labels=None, loop_keys=None) -> None:
r"""Write *images* to CIF file.
wrap: bool
Wrap atoms into unit cell.
labels: list
Use this list (shaped list[i_frame][i_atom] = string) for the
'_atom_site_label' section instead of automatically generating
it from the element symbol.
loop_keys: dict
Add the information from this dictionary to the `loop\_`
section. Keys are printed to the `loop\_` section preceeded by
' _'. dict[key] should contain the data printed for each atom,
so it needs to have the setup `dict[key][i_frame][i_atom] =
string`. The strings are printed as they are, so take care of
formating. Information can be re-read using the `store_tags`
option of the cif reader.
"""
if cif_format is not None:
warnings.warn('The cif_format argument is deprecated and may be '
'removed in the future. Use loop_keys to customize '
'data written in loop.', FutureWarning)
if loop_keys is None:
loop_keys = {}
if hasattr(images, 'get_positions'):
images = [images]
fd = io.TextIOWrapper(fd, encoding='latin-1')
try:
for i, atoms in enumerate(images):
blockname = f'data_image{i}\n'
image_loop_keys = {key: loop_keys[key][i] for key in loop_keys}
write_cif_image(blockname, atoms, fd,
wrap=wrap,
labels=None if labels is None else labels[i],
loop_keys=image_loop_keys)
finally:
# Using the TextIOWrapper somehow causes the file to close
# when this function returns.
# Detach in order to circumvent this highly illogical problem:
fd.detach()
def autolabel(symbols: Sequence[str]) -> List[str]:
no: Dict[str, int] = {}
labels = []
for symbol in symbols:
if symbol in no:
no[symbol] += 1
else:
no[symbol] = 1
labels.append('%s%d' % (symbol, no[symbol]))
return labels
def chemical_formula_header(atoms):
counts = atoms.symbols.formula.count()
formula_sum = ' '.join(f'{sym}{count}' for sym, count
in counts.items())
return (f'_chemical_formula_structural {atoms.symbols}\n'
f'_chemical_formula_sum "{formula_sum}"\n')
class BadOccupancies(ValueError):
pass
def expand_kinds(atoms, coords):
# try to fetch occupancies // spacegroup_kinds - occupancy mapping
symbols = list(atoms.symbols)
coords = list(coords)
occupancies = [1] * len(symbols)
occ_info = atoms.info.get('occupancy')
kinds = atoms.arrays.get('spacegroup_kinds')
if occ_info is not None and kinds is not None:
for i, kind in enumerate(kinds):
occ_info_kind = occ_info[str(kind)]
symbol = symbols[i]
if symbol not in occ_info_kind:
raise BadOccupancies('Occupancies present but no occupancy '
'info for "{symbol}"')
occupancies[i] = occ_info_kind[symbol]
# extend the positions array in case of mixed occupancy
for sym, occ in occ_info[str(kind)].items():
if sym != symbols[i]:
symbols.append(sym)
coords.append(coords[i])
occupancies.append(occ)
return symbols, coords, occupancies
def atoms_to_loop_data(atoms, wrap, labels, loop_keys):
if atoms.cell.rank == 3:
coord_type = 'fract'
coords = atoms.get_scaled_positions(wrap).tolist()
else:
coord_type = 'Cartn'
coords = atoms.get_positions(wrap).tolist()
try:
symbols, coords, occupancies = expand_kinds(atoms, coords)
except BadOccupancies as err:
warnings.warn(str(err))
occupancies = [1] * len(atoms)
symbols = list(atoms.symbols)
if labels is None:
labels = autolabel(symbols)
coord_headers = [f'_atom_site_{coord_type}_{axisname}'
for axisname in 'xyz']
loopdata = {}
loopdata['_atom_site_label'] = (labels, '{:<8s}')
loopdata['_atom_site_occupancy'] = (occupancies, '{:6.4f}')
_coords = np.array(coords)
for i, key in enumerate(coord_headers):
loopdata[key] = (_coords[:, i], '{}')
loopdata['_atom_site_type_symbol'] = (symbols, '{:<2s}')
loopdata['_atom_site_symmetry_multiplicity'] = (
[1.0] * len(symbols), '{}')
for key in loop_keys:
# Should expand the loop_keys like we expand the occupancy stuff.
# Otherwise user will never figure out how to do this.
values = [loop_keys[key][i] for i in range(len(symbols))]
loopdata['_' + key] = (values, '{}')
return loopdata, coord_headers
def write_cif_image(blockname, atoms, fd, *, wrap,
labels, loop_keys):
fd.write(blockname)
fd.write(chemical_formula_header(atoms))
rank = atoms.cell.rank
if rank == 3:
fd.write(format_cell(atoms.cell))
fd.write('\n')
fd.write(format_generic_spacegroup_info())
fd.write('\n')
elif rank != 0:
raise ValueError('CIF format can only represent systems with '
f'0 or 3 lattice vectors. Got {rank}.')
loopdata, coord_headers = atoms_to_loop_data(atoms, wrap, labels,
loop_keys)
headers = [
'_atom_site_type_symbol',
'_atom_site_label',
'_atom_site_symmetry_multiplicity',
*coord_headers,
'_atom_site_occupancy',
]
headers += ['_' + key for key in loop_keys]
loop = CIFLoop()
for header in headers:
array, fmt = loopdata[header]
loop.add(header, array, fmt)
fd.write(loop.tostring())
|