File: test_cif.py

package info (click to toggle)
python-ase 3.21.1-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 13,936 kB
  • sloc: python: 122,428; xml: 946; makefile: 111; javascript: 47
file content (536 lines) | stat: -rw-r--r-- 14,241 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
import io
import numpy as np
import warnings
import pytest

from ase import Atoms
from ase.build import molecule
from ase.io import read, write
from ase.io.cif import CIFLoop, parse_loop, NoStructureData, parse_cif
from ase.calculators.calculator import compare_atoms


def check_fractional_occupancies(atoms):
    """ Checks fractional occupancy entries in atoms.info dict """
    assert atoms.info['occupancy']
    assert list(atoms.arrays['spacegroup_kinds'])

    occupancies = atoms.info['occupancy']
    kinds = atoms.arrays['spacegroup_kinds']
    for a in atoms:
        if a.symbol == 'Na':
            assert len(occupancies[kinds[a.index]]) == 2
            assert occupancies[kinds[a.index]]['K'] == 0.25
            assert occupancies[kinds[a.index]]['Na'] == 0.75
        else:
            assert len(occupancies[kinds[a.index]]) == 1
        if a.symbol == 'Cl':
            assert occupancies[kinds[a.index]]['Cl'] == 0.3


content = """
data_1


_chemical_name_common                  'Mysterious something'
_cell_length_a                         5.50000
_cell_length_b                         5.50000
_cell_length_c                         5.50000
_cell_angle_alpha                      90
_cell_angle_beta                       90
_cell_angle_gamma                      90
_space_group_name_H-M_alt              'F m -3 m'
_space_group_IT_number                 225

loop_
_space_group_symop_operation_xyz
   'x, y, z'
   '-x, -y, -z'
   '-x, -y, z'
   'x, y, -z'
   '-x, y, -z'
   'x, -y, z'
   'x, -y, -z'
   '-x, y, z'
   'z, x, y'
   '-z, -x, -y'
   'z, -x, -y'
   '-z, x, y'
   '-z, -x, y'
   'z, x, -y'
   '-z, x, -y'
   'z, -x, y'
   'y, z, x'
   '-y, -z, -x'
   '-y, z, -x'
   'y, -z, x'
   'y, -z, -x'
   '-y, z, x'
   '-y, -z, x'
   'y, z, -x'
   'y, x, -z'
   '-y, -x, z'
   '-y, -x, -z'
   'y, x, z'
   'y, -x, z'
   '-y, x, -z'
   '-y, x, z'
   'y, -x, -z'
   'x, z, -y'
   '-x, -z, y'
   '-x, z, y'
   'x, -z, -y'
   '-x, -z, -y'
   'x, z, y'
   'x, -z, y'
   '-x, z, -y'
   'z, y, -x'
   '-z, -y, x'
   'z, -y, x'
   '-z, y, -x'
   '-z, y, x'
   'z, -y, -x'
   '-z, -y, -x'
   'z, y, x'
   'x, y+1/2, z+1/2'
   '-x, -y+1/2, -z+1/2'
   '-x, -y+1/2, z+1/2'
   'x, y+1/2, -z+1/2'
   '-x, y+1/2, -z+1/2'
   'x, -y+1/2, z+1/2'
   'x, -y+1/2, -z+1/2'
   '-x, y+1/2, z+1/2'
   'z, x+1/2, y+1/2'
   '-z, -x+1/2, -y+1/2'
   'z, -x+1/2, -y+1/2'
   '-z, x+1/2, y+1/2'
   '-z, -x+1/2, y+1/2'
   'z, x+1/2, -y+1/2'
   '-z, x+1/2, -y+1/2'
   'z, -x+1/2, y+1/2'
   'y, z+1/2, x+1/2'
   '-y, -z+1/2, -x+1/2'
   '-y, z+1/2, -x+1/2'
   'y, -z+1/2, x+1/2'
   'y, -z+1/2, -x+1/2'
   '-y, z+1/2, x+1/2'
   '-y, -z+1/2, x+1/2'
   'y, z+1/2, -x+1/2'
   'y, x+1/2, -z+1/2'
   '-y, -x+1/2, z+1/2'
   '-y, -x+1/2, -z+1/2'
   'y, x+1/2, z+1/2'
   'y, -x+1/2, z+1/2'
   '-y, x+1/2, -z+1/2'
   '-y, x+1/2, z+1/2'
   'y, -x+1/2, -z+1/2'
   'x, z+1/2, -y+1/2'
   '-x, -z+1/2, y+1/2'
   '-x, z+1/2, y+1/2'
   'x, -z+1/2, -y+1/2'
   '-x, -z+1/2, -y+1/2'
   'x, z+1/2, y+1/2'
   'x, -z+1/2, y+1/2'
   '-x, z+1/2, -y+1/2'
   'z, y+1/2, -x+1/2'
   '-z, -y+1/2, x+1/2'
   'z, -y+1/2, x+1/2'
   '-z, y+1/2, -x+1/2'
   '-z, y+1/2, x+1/2'
   'z, -y+1/2, -x+1/2'
   '-z, -y+1/2, -x+1/2'
   'z, y+1/2, x+1/2'
   'x+1/2, y, z+1/2'
   '-x+1/2, -y, -z+1/2'
   '-x+1/2, -y, z+1/2'
   'x+1/2, y, -z+1/2'
   '-x+1/2, y, -z+1/2'
   'x+1/2, -y, z+1/2'
   'x+1/2, -y, -z+1/2'
   '-x+1/2, y, z+1/2'
   'z+1/2, x, y+1/2'
   '-z+1/2, -x, -y+1/2'
   'z+1/2, -x, -y+1/2'
   '-z+1/2, x, y+1/2'
   '-z+1/2, -x, y+1/2'
   'z+1/2, x, -y+1/2'
   '-z+1/2, x, -y+1/2'
   'z+1/2, -x, y+1/2'
   'y+1/2, z, x+1/2'
   '-y+1/2, -z, -x+1/2'
   '-y+1/2, z, -x+1/2'
   'y+1/2, -z, x+1/2'
   'y+1/2, -z, -x+1/2'
   '-y+1/2, z, x+1/2'
   '-y+1/2, -z, x+1/2'
   'y+1/2, z, -x+1/2'
   'y+1/2, x, -z+1/2'
   '-y+1/2, -x, z+1/2'
   '-y+1/2, -x, -z+1/2'
   'y+1/2, x, z+1/2'
   'y+1/2, -x, z+1/2'
   '-y+1/2, x, -z+1/2'
   '-y+1/2, x, z+1/2'
   'y+1/2, -x, -z+1/2'
   'x+1/2, z, -y+1/2'
   '-x+1/2, -z, y+1/2'
   '-x+1/2, z, y+1/2'
   'x+1/2, -z, -y+1/2'
   '-x+1/2, -z, -y+1/2'
   'x+1/2, z, y+1/2'
   'x+1/2, -z, y+1/2'
   '-x+1/2, z, -y+1/2'
   'z+1/2, y, -x+1/2'
   '-z+1/2, -y, x+1/2'
   'z+1/2, -y, x+1/2'
   '-z+1/2, y, -x+1/2'
   '-z+1/2, y, x+1/2'
   'z+1/2, -y, -x+1/2'
   '-z+1/2, -y, -x+1/2'
   'z+1/2, y, x+1/2'
   'x+1/2, y+1/2, z'
   '-x+1/2, -y+1/2, -z'
   '-x+1/2, -y+1/2, z'
   'x+1/2, y+1/2, -z'
   '-x+1/2, y+1/2, -z'
   'x+1/2, -y+1/2, z'
   'x+1/2, -y+1/2, -z'
   '-x+1/2, y+1/2, z'
   'z+1/2, x+1/2, y'
   '-z+1/2, -x+1/2, -y'
   'z+1/2, -x+1/2, -y'
   '-z+1/2, x+1/2, y'
   '-z+1/2, -x+1/2, y'
   'z+1/2, x+1/2, -y'
   '-z+1/2, x+1/2, -y'
   'z+1/2, -x+1/2, y'
   'y+1/2, z+1/2, x'
   '-y+1/2, -z+1/2, -x'
   '-y+1/2, z+1/2, -x'
   'y+1/2, -z+1/2, x'
   'y+1/2, -z+1/2, -x'
   '-y+1/2, z+1/2, x'
   '-y+1/2, -z+1/2, x'
   'y+1/2, z+1/2, -x'
   'y+1/2, x+1/2, -z'
   '-y+1/2, -x+1/2, z'
   '-y+1/2, -x+1/2, -z'
   'y+1/2, x+1/2, z'
   'y+1/2, -x+1/2, z'
   '-y+1/2, x+1/2, -z'
   '-y+1/2, x+1/2, z'
   'y+1/2, -x+1/2, -z'
   'x+1/2, z+1/2, -y'
   '-x+1/2, -z+1/2, y'
   '-x+1/2, z+1/2, y'
   'x+1/2, -z+1/2, -y'
   '-x+1/2, -z+1/2, -y'
   'x+1/2, z+1/2, y'
   'x+1/2, -z+1/2, y'
   '-x+1/2, z+1/2, -y'
   'z+1/2, y+1/2, -x'
   '-z+1/2, -y+1/2, x'
   'z+1/2, -y+1/2, x'
   '-z+1/2, y+1/2, -x'
   '-z+1/2, y+1/2, x'
   'z+1/2, -y+1/2, -x'
   '-z+1/2, -y+1/2, -x'
   'z+1/2, y+1/2, x'

loop_
   _atom_site_label
   _atom_site_occupancy
   _atom_site_fract_x
   _atom_site_fract_y
   _atom_site_fract_z
   _atom_site_adp_type
   _atom_site_B_iso_or_equiv
   _atom_site_type_symbol
   Na         0.7500  0.000000      0.000000      0.000000     Biso  1.000000 Na
   K          0.2500  0.000000      0.000000      0.000000     Biso  1.000000 K
   Cl         0.3000  0.500000      0.500000      0.500000     Biso  1.000000 Cl
   I          0.5000  0.250000      0.250000      0.250000     Biso  1.000000 I
"""


def test_cif():
    cif_file = io.StringIO(content)

    # legacy behavior is to not read the K atoms
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        atoms_leg = read(cif_file, format='cif', fractional_occupancies=False)
    elements = np.unique(atoms_leg.get_atomic_numbers())
    for n in (11, 17, 53):
        assert n in elements
    try:
        atoms_leg.info['occupancy']
        raise AssertionError
    except KeyError:
        pass

    cif_file = io.StringIO(content)
    # new behavior is to still not read the K atoms, but build info
    atoms = read(cif_file, format='cif', fractional_occupancies=True)

    # yield the same old atoms for fractional_occupancies case
    assert len(atoms_leg) == len(atoms)
    assert np.all(atoms_leg.get_atomic_numbers() == atoms.get_atomic_numbers())
    assert atoms_leg == atoms

    elements = np.unique(atoms_leg.get_atomic_numbers())
    for n in (11, 17, 53):
        assert n in elements

    check_fractional_occupancies(atoms)

    # read/write
    fname = 'testfile.cif'
    with open(fname, 'wb') as fd:
        write(fd, atoms, format='cif')

    with open(fname) as fd:
        atoms = read(fd, format='cif', fractional_occupancies=True)

    check_fractional_occupancies(atoms)

    # check repeating atoms
    atoms = atoms.repeat([2, 1, 1])
    assert len(atoms.arrays['spacegroup_kinds']) == len(atoms.arrays['numbers'])


# ICSD-like file from issue #293
content2 = """
data_global
_cell_length_a 9.378(5)
_cell_length_b 7.488(5)
_cell_length_c 6.513(5)
_cell_angle_alpha 90.
_cell_angle_beta 91.15(5)
_cell_angle_gamma 90.
_cell_volume 457.27
_cell_formula_units_Z 2
_symmetry_space_group_name_H-M 'P 1 n 1'
_symmetry_Int_Tables_number 7
_refine_ls_R_factor_all 0.071
loop_
_symmetry_equiv_pos_site_id
_symmetry_equiv_pos_as_xyz
1 'x+1/2, -y, z+1/2'
2 'x, y, z'
loop_
_atom_type_symbol
_atom_type_oxidation_number
Sn2+ 2
As4+ 4
Se2- -2
loop_
_atom_site_label
_atom_site_type_symbol
_atom_site_symmetry_multiplicity
_atom_site_Wyckoff_symbol
_atom_site_fract_x
_atom_site_fract_y
_atom_site_fract_z
_atom_site_B_iso_or_equiv
_atom_site_occupancy
_atom_site_attached_hydrogens
Sn1 Sn2+ 2 a 0.5270(2) 0.3856(2) 0.7224(3) 0.0266(4) 1. 0
Sn2 Sn2+ 2 a 0.0279(2) 0.1245(2) 0.7870(2) 0.0209(4) 1. 0
As1 As4+ 2 a 0.6836(4) 0.1608(5) 0.8108(6) 0.0067(7) 1. 0
As2 As4+ 2 a 0.8174(4) 0.6447(6) 0.1908(6) 0.0057(6) 1. 0
Se1 Se2- 2 a 0.4898(4) 0.7511(6) 0.8491(6) 0.0110(6) 1. 0
Se2 Se2- 2 a 0.7788(4) 0.6462(6) 0.2750(6) 0.0097(6) 1. 0
Se3 Se2- 2 a 0.6942(4) 0.0517(5) 0.5921(6) 0.2095(6) 1. 0
Se4 Se2- 2 a 0.0149(4) 0.3437(6) 0.5497(7) 0.1123(7) 1. 0
Se5 Se2- 2 a 0.1147(4) 0.5633(4) 0.3288(6) 0.1078(6) 1. 0
Se6 Se2- 2 a 0.0050(4) 0.4480(6) 0.9025(6) 0.9102(6) 1. 0
"""


def test_cif_icsd():
    cif_file = io.StringIO(content2)
    atoms = read(cif_file, format='cif')
    # test something random so atoms is not unused
    assert 'occupancy' in atoms.info


@pytest.fixture
def cif_atoms():
    cif_file = io.StringIO(content)
    return read(cif_file, format='cif')


def test_cif_loop_keys(cif_atoms):
    data = {}
    # test case has 20 entries
    data['someKey'] = [[str(i) + "test" for i in range(20)]]
    # test case has 20 entries
    data['someIntKey'] = [[str(i) + "123" for i in range(20)]]
    cif_atoms.write('testfile.cif', loop_keys=data)

    atoms1 = read('testfile.cif', store_tags=True)
    # keys are read lowercase only
    r_data = {'someKey': atoms1.info['_somekey'],
              'someIntKey': atoms1.info['_someintkey']}
    assert r_data['someKey'] == data['someKey'][0]
    # data reading auto converts strins
    assert r_data['someIntKey'] == [int(x) for x in data['someIntKey'][0]]


# test if automatic numbers written after elements are correct
def test_cif_writer_label_numbers(cif_atoms):
    cif_atoms.write('testfile.cif')
    atoms1 = read('testfile.cif', store_tags=True)
    labels = atoms1.info['_atom_site_label']
    # cannot use atoms.symbols as K is missing there
    elements = atoms1.info['_atom_site_type_symbol']
    build_labels = [
        "{:}{:}".format(
            x, i) for x in set(elements) for i in range(
            1, elements.count(x) + 1)]
    assert build_labels.sort() == labels.sort()


def test_cif_labels(cif_atoms):
    data = [["label" + str(i) for i in range(20)]]  # test case has 20 entries
    cif_atoms.write('testfile.cif', labels=data)

    atoms1 = read('testfile.cif', store_tags=True)
    print(atoms1.info)
    assert data[0] == atoms1.info['_atom_site_label']


def test_cifloop():
    dct = {'_eggs': range(4),
           '_potatoes': [1.3, 7.1, -1, 0]}

    loop = CIFLoop()
    loop.add('_eggs', dct['_eggs'], '{:<2d}')
    loop.add('_potatoes', dct['_potatoes'], '{:.4f}')

    string = loop.tostring() + '\n\n'
    lines = string.splitlines()[::-1]
    assert lines.pop() == 'loop_'

    newdct = parse_loop(lines)
    print(newdct)
    assert set(dct) == set(newdct)
    for name in dct:
        assert dct[name] == pytest.approx(newdct[name])


@pytest.mark.parametrize('data', [b'', b'data_dummy'])
def test_empty_or_atomless(data):
    ciffile = io.BytesIO(data)

    images = read(ciffile, index=':', format='cif')
    assert len(images) == 0


def test_empty_or_atomless_cifblock():
    ciffile = io.BytesIO(b'data_dummy')
    blocks = list(parse_cif(ciffile))

    assert len(blocks) == 1
    assert not blocks[0].has_structure()
    with pytest.raises(NoStructureData):
        blocks[0].get_atoms()


def test_symbols_questionmark():
    ciffile = io.BytesIO(
        b'data_dummy\n'
        b'loop_\n'
        b'_atom_site_label\n'
        b'?\n')
    blocks = list(parse_cif(ciffile))
    assert not blocks[0].has_structure()
    with pytest.raises(NoStructureData, match='undetermined'):
        blocks[0].get_atoms()


def test_bad_occupancies(cif_atoms):
    assert 'Au' not in cif_atoms.symbols
    cif_atoms.symbols[0] = 'Au'
    with pytest.warns(UserWarning, match='no occupancy info'):
        write('tmp.cif', cif_atoms)


@pytest.mark.parametrize(
    'setting_name, ref_setting',
    [
        ('hexagonal', 1),
        ('trigonal', 2),
        ('rhombohedral', 2)
    ]
)
def test_spacegroup_named_setting(setting_name, ref_setting):
    """The rhombohedral crystal system signifies setting=2"""
    ciffile = io.BytesIO("""\
data_test
_space_group_crystal_system {}
_symmetry_space_group_name_H-M         'R-3m'
""".format(setting_name).encode('ascii'))

    blocks = list(parse_cif(ciffile))
    assert len(blocks) == 1
    spg = blocks[0].get_spacegroup(False)
    assert int(spg) == 166
    assert spg.setting == ref_setting


@pytest.fixture
def atoms():
    return Atoms('CO', cell=[2., 3., 4., 50., 60., 70.], pbc=True,
                 scaled_positions=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])


def roundtrip(atoms):
    from ase.io.bytes import to_bytes, parse_atoms
    buf = to_bytes(atoms, format='cif')
    return parse_atoms(buf, format='cif')


def test_cif_roundtrip_periodic(atoms):
    # Reading and writing the cell loses the rotation information,
    # but preserves cellpar and scaled positions.
    atoms1 = roundtrip(atoms)

    assert str(atoms1.symbols) == 'CO'
    assert all(atoms1.pbc)
    assert atoms.cell.cellpar() == pytest.approx(
        atoms1.cell.cellpar(), abs=1e-5)
    assert atoms.get_scaled_positions() == pytest.approx(
        atoms1.get_scaled_positions(), abs=1e-5)


def test_cif_roundtrip_nonperiodic():
    atoms = molecule('H2O')
    atoms1 = roundtrip(atoms)
    assert not compare_atoms(atoms, atoms1, tol=1e-5)


def test_cif_missingvector(atoms):
    # We don't know any way to represent only 2 cell vectors in CIF.
    # So we discard them and warn the user.
    atoms.cell[0] = 0.0
    atoms.pbc[0] = False

    assert atoms.cell.rank == 2

    with pytest.raises(ValueError, match='CIF format can only'):
        roundtrip(atoms)


def test_cif_roundtrip_mixed():
    atoms = Atoms('Au', cell=[1., 2., 3.], pbc=[1, 1, 0])
    atoms1 = roundtrip(atoms)

    # We cannot preserve PBC info for this case:
    assert all(atoms1.pbc)
    assert compare_atoms(atoms, atoms1, tol=1e-5) == ['pbc']
    assert atoms.get_scaled_positions() == pytest.approx(
        atoms1.get_scaled_positions(), abs=1e-5)
    #assert pytest.approx(atoms.positions) == atoms1.positions
    #assert atoms1.cell.rank == 0