File: castep_reader.py

package info (click to toggle)
python-ase 3.26.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 15,484 kB
  • sloc: python: 148,112; xml: 2,728; makefile: 110; javascript: 47
file content (625 lines) | stat: -rw-r--r-- 22,425 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
# fmt: off

import io
import re
import warnings
from collections import defaultdict
from typing import Any, Dict, List, Optional

import numpy as np

from ase import Atoms, units
from ase.calculators.singlepoint import SinglePointCalculator
from ase.constraints import FixAtoms, FixCartesian, FixConstraint
from ase.io import ParseError
from ase.utils import reader, string2index


@reader
def read_castep_castep(fd, index=-1):
    """Read a .castep file and returns an Atoms object.

    The calculator information will be stored in the calc attribute.

    Notes
    -----
    This routine will return an atom ordering as found within the castep file.
    This means that the species will be ordered by ascending atomic numbers.
    The atoms witin a species are ordered as given in the original cell file.

    """
    # look for last result, if several CASTEP run are appended
    line_start, line_end, end_found = _find_last_record(fd)
    if not end_found:
        raise ParseError(f'No regular end found in {fd.name} file.')

    # These variables are finally assigned to `SinglePointCalculator`
    # for backward compatibility with the `Castep` calculator.
    cut_off_energy = None
    kpoints = None
    total_time = None
    peak_memory = None

    # jump back to the beginning to the last record
    fd.seek(0)
    for i, line in enumerate(fd):
        if i == line_start:
            break

    # read header
    parameters_header = _read_header(fd)
    if 'cut_off_energy' in parameters_header:
        cut_off_energy = parameters_header['cut_off_energy']
        if 'basis_precision' in parameters_header:
            del parameters_header['cut_off_energy']  # avoid conflict

    markers_new_iteration = [
        'BFGS: starting iteration',
        'BFGS: improving iteration',
        'Starting MD iteration',
    ]

    images = []

    results = {}
    constraints = []
    species_pot = []
    castep_warnings = []
    for i, line in enumerate(fd):

        if i > line_end:
            break

        if 'Number of kpoints used' in line:
            kpoints = int(line.split('=')[-1].strip())
        elif 'Unit Cell' in line:
            lattice_real = _read_unit_cell(fd)
        elif 'Cell Contents' in line:
            for line in fd:
                if 'Total number of ions in cell' in line:
                    n_atoms = int(line.split()[7])
                if 'Total number of species in cell' in line:
                    int(line.split()[7])
                fields = line.split()
                if len(fields) == 0:
                    break
        elif 'Fractional coordinates of atoms' in line:
            species, custom_species, positions_frac = \
                _read_fractional_coordinates(fd, n_atoms)
        elif 'Files used for pseudopotentials' in line:
            for line in fd:
                line = fd.readline()
                if 'Pseudopotential generated on-the-fly' in line:
                    continue
                fields = line.split()
                if len(fields) == 2:
                    species_pot.append(fields)
                else:
                    break
        elif 'k-Points For BZ Sampling' in line:
            # TODO: generalize for non-Monkhorst Pack case
            # (i.e. kpoint lists) -
            # kpoints_offset cannot be read this way and
            # is hence always set to None
            for line in fd:
                if not line.strip():
                    break
                if 'MP grid size for SCF calculation' in line:
                    # kpoints =  ' '.join(line.split()[-3:])
                    # self.kpoints_mp_grid = kpoints
                    # self.kpoints_mp_offset = '0. 0. 0.'
                    # not set here anymore because otherwise
                    # two calculator objects go out of sync
                    # after each calculation triggering unnecessary
                    # recalculation
                    break

        elif 'Final energy' in line:
            key = 'energy_without_dispersion_correction'
            results[key] = float(line.split()[-2])
        elif 'Final free energy' in line:
            key = 'free_energy_without_dispersion_correction'
            results[key] = float(line.split()[-2])
        elif 'NB est. 0K energy' in line:
            key = 'energy_zero_without_dispersion_correction'
            results[key] = float(line.split()[-2])

        # Add support for dispersion correction
        # filtering due to SEDC is done in get_potential_energy
        elif 'Dispersion corrected final energy' in line:
            key = 'energy_with_dispersion_correlation'
            results[key] = float(line.split()[-2])
        elif 'Dispersion corrected final free energy' in line:
            key = 'free_energy_with_dispersion_correlation'
            results[key] = float(line.split()[-2])
        elif 'NB dispersion corrected est. 0K energy' in line:
            key = 'energy_zero_with_dispersion_correlation'
            results[key] = float(line.split()[-2])

        # check if we had a finite basis set correction
        elif 'Total energy corrected for finite basis set' in line:
            key = 'energy_with_finite_basis_set_correction'
            results[key] = float(line.split()[-2])

        # ******************** Forces *********************
        # ************** Symmetrised Forces ***************
        # ******************** Constrained Forces ********************
        # ******************* Unconstrained Forces *******************
        elif re.search(r'\**.* Forces \**', line):
            forces, constraints = _read_forces(fd, n_atoms)
            results['forces'] = np.array(forces)

        # ***************** Stress Tensor *****************
        # *********** Symmetrised Stress Tensor ***********
        elif re.search(r'\**.* Stress Tensor \**', line):
            results.update(_read_stress(fd))

        elif any(_ in line for _ in markers_new_iteration):
            _add_atoms(
                images,
                lattice_real,
                species,
                custom_species,
                positions_frac,
                constraints,
                results,
            )
            # reset for the next step
            lattice_real = None
            species = None
            positions_frac = None
            results = {}
            constraints = []

        # extract info from the Mulliken analysis
        elif 'Atomic Populations' in line:
            results.update(_read_mulliken_charges(fd))

        # extract detailed Hirshfeld analysis (iprint > 1)
        elif 'Hirshfeld total electronic charge (e)' in line:
            results.update(_read_hirshfeld_details(fd, n_atoms))

        elif 'Hirshfeld Analysis' in line:
            results.update(_read_hirshfeld_charges(fd))

        # There is actually no good reason to get out of the loop
        # already at this point... or do I miss something?
        # elif 'BFGS: Final Configuration:' in line:
        #    break
        elif 'warn' in line.lower():
            castep_warnings.append(line)

        # fetch some last info
        elif 'Total time' in line:
            pattern = r'.*=\s*([\d\.]+) s'
            total_time = float(re.search(pattern, line).group(1))

        elif 'Peak Memory Use' in line:
            pattern = r'.*=\s*([\d]+) kB'
            peak_memory = int(re.search(pattern, line).group(1))

    # add the last image
    _add_atoms(
        images,
        lattice_real,
        species,
        custom_species,
        positions_frac,
        constraints,
        results,
    )

    for atoms in images:
        # these variables are temporarily assigned to `SinglePointCalculator`
        # to be assigned to the `Castep` calculator for backward compatibility
        atoms.calc._cut_off_energy = cut_off_energy
        atoms.calc._kpoints = kpoints
        atoms.calc._species_pot = species_pot
        atoms.calc._total_time = total_time
        atoms.calc._peak_memory = peak_memory
        atoms.calc._parameters_header = parameters_header

    if castep_warnings:
        warnings.warn('WARNING: .castep file contains warnings')
        for warning in castep_warnings:
            warnings.warn(warning)

    if isinstance(index, str):
        index = string2index(index)

    return images[index]


def _find_last_record(fd):
    """Find the last record of the .castep file.

    Returns
    -------
    start : int
        Line number of the first line of the last record.
    end : int
        Line number of the last line of the last record.
    end_found : bool
        True if the .castep file ends as expected.

    """
    start = -1
    for i, line in enumerate(fd):
        if (('Welcome' in line or 'Materials Studio' in line)
                and 'CASTEP' in line):
            start = i

    if start < 0:
        warnings.warn(
            f'Could not find CASTEP label in result file: {fd.name}.'
            ' Are you sure this is a .castep file?'
        )
        return None

    # search for regular end of file
    end_found = False
    # start to search from record beginning from the back
    # and see if
    end = -1
    fd.seek(0)
    for i, line in enumerate(fd):
        if i < start:
            continue
        if 'Finalisation time   =' in line:
            end_found = True
            end = i
            break

    return (start, end, end_found)


def _read_header(out: io.TextIOBase):
    """Read the header blocks from a .castep file.

    Returns
    -------
    parameters : dict
        Dictionary storing keys and values of a .param file.
    """
    def _parse_on_off(_: str):
        return {'on': True, 'off': False}[_]

    read_title = False
    parameters: Dict[str, Any] = {}
    for line in out:
        if len(line) == 0:  # end of file
            break
        if re.search(r'^\s*\*+$', line) and read_title:  # end of header
            break

        if re.search(r'\**.* Title \**', line):
            read_title = True

        # General Parameters

        elif 'output verbosity' in line:
            parameters['iprint'] = int(line.split()[-1][1])
        elif re.match(r'\stype of calculation\s*:', line):
            parameters['task'] = {
                'single point energy': 'SinglePoint',
                'geometry optimization': 'GeometryOptimization',
                'band structure': 'BandStructure',
                'molecular dynamics': 'MolecularDynamics',
                'optical properties': 'Optics',
                'phonon calculation': 'Phonon',
                'E-field calculation': 'Efield',
                'Phonon followed by E-field': 'Phonon+Efield',
                'transition state search': 'TransitionStateSearch',
                'Magnetic Resonance': 'MagRes',
                'Core level spectra': 'Elnes',
                'Electronic Spectroscopy': 'ElectronicSpectroscopy',
            }[line.split(':')[-1].strip()]
        elif 'stress calculation' in line:
            parameters['calculate_stress'] = _parse_on_off(line.split()[-1])
        elif 'calculation limited to maximum' in line:
            parameters['run_time'] = float(line.split()[-2])
        elif re.match(r'\soptimization strategy\s*:', line):
            parameters['opt_strategy'] = {
                'maximize speed(+++)': 'Speed',
                'minimize memory(---)': 'Memory',
                'balance speed and memory': 'Default',
            }[line.split(':')[-1].strip()]

        # Exchange-Correlation Parameters
        elif re.match(r'\susing functional\s*:', line):
            functional_abbrevs = {
                'Local Density Approximation': 'LDA',
                'Perdew Wang (1991)': 'PW91',
                'Perdew Burke Ernzerhof': 'PBE',
                'revised Perdew Burke Ernzerhof': 'RPBE',
                'PBE with Wu-Cohen exchange': 'WC',
                'PBE for solids (2008)': 'PBESOL',
                'Hartree-Fock': 'HF',
                'Hartree-Fock +': 'HF-LDA',
                'Screened Hartree-Fock': 'sX',
                'Screened Hartree-Fock + ': 'sX-LDA',
                'hybrid PBE0': 'PBE0',
                'hybrid B3LYP': 'B3LYP',
                'hybrid HSE03': 'HSE03',
                'hybrid HSE06': 'HSE06',
                'RSCAN': 'RSCAN',
            }

            # If the name is not recognised, use the whole string.
            # This won't work in a new calculation, so will need to load from
            # .param file in such cases... but at least it will fail rather
            # than use the wrong XC!
            _xc_full_name = line.split(':')[-1].strip()
            parameters['xc_functional'] = functional_abbrevs.get(
                _xc_full_name, _xc_full_name)

        elif 'DFT+D: Semi-empirical dispersion correction' in line:
            parameters['sedc_apply'] = _parse_on_off(line.split()[-1])
        elif 'SEDC with' in line:
            parameters['sedc_scheme'] = {
                'OBS correction scheme': 'OBS',
                'G06 correction scheme': 'G06',
                'D3 correction scheme': 'D3',
                'D3(BJ) correction scheme': 'D3-BJ',
                'D4 correction scheme': 'D4',
                'JCHS correction scheme': 'JCHS',
                'TS correction scheme': 'TS',
                'TSsurf correction scheme': 'TSSURF',
                'TS+SCS correction scheme': 'TSSCS',
                'aperiodic TS+SCS correction scheme': 'ATSSCS',
                'aperiodic MBD@SCS method': 'AMBD',
                'MBD@SCS method': 'MBD',
                'aperiodic MBD@rsSCS method': 'AMBD*',
                'MBD@rsSCS method': 'MBD*',
                'XDM correction scheme': 'XDM',
            }[line.split(':')[-1].strip()]

        # Basis Set Parameters

        elif 'basis set accuracy' in line:
            parameters['basis_precision'] = line.split()[-1]
        elif 'plane wave basis set cut-off' in line:
            parameters['cut_off_energy'] = float(line.split()[-2])
        elif re.match(r'\sfinite basis set correction\s*:', line):
            parameters['finite_basis_corr'] = {
                'none': 0,
                'manual': 1,
                'automatic': 2,
            }[line.split()[-1]]

        # Electronic Parameters

        elif 'treating system as spin-polarized' in line:
            parameters['spin_polarized'] = True

        # Electronic Minimization Parameters

        elif 'Treating system as non-metallic' in line:
            parameters['fix_occupancy'] = True
        elif 'total energy / atom convergence tol.' in line:
            parameters['elec_energy_tol'] = float(line.split()[-2])
        elif 'convergence tolerance window' in line:
            parameters['elec_convergence_win'] = int(line.split()[-2])
        elif 'max. number of SCF cycles:' in line:
            parameters['max_scf_cycles'] = float(line.split()[-1])
        elif 'dump wavefunctions every' in line:
            parameters['num_dump_cycles'] = float(line.split()[-3])

        # Density Mixing Parameters

        elif 'density-mixing scheme' in line:
            parameters['mixing_scheme'] = line.split()[-1]

    return parameters


def _read_unit_cell(out: io.TextIOBase):
    """Read a Unit Cell block from a .castep file."""
    for line in out:
        fields = line.split()
        if len(fields) == 6:
            break
    lattice_real = []
    for _ in range(3):
        lattice_real.append([float(f) for f in fields[0:3]])
        line = out.readline()
        fields = line.split()
    return np.array(lattice_real)


def _read_forces(out: io.TextIOBase, n_atoms: int):
    """Read a block for atomic forces from a .castep file."""
    constraints: List[FixConstraint] = []
    forces = []
    for line in out:
        fields = line.split()
        if len(fields) == 7:
            break
    for n in range(n_atoms):
        consd = np.array([0, 0, 0])
        fxyz = [0.0, 0.0, 0.0]
        for i, force_component in enumerate(fields[-4:-1]):
            if force_component.count("(cons'd)") > 0:
                consd[i] = 1
            # remove constraint labels in force components
            fxyz[i] = float(force_component.replace("(cons'd)", ''))
        if consd.all():
            constraints.append(FixAtoms(n))
        elif consd.any():
            constraints.append(FixCartesian(n, consd))
        forces.append(fxyz)
        line = out.readline()
        fields = line.split()
    return forces, constraints


def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int):
    """Read fractional coordinates from a .castep file."""
    species: List[str] = []
    custom_species: Optional[List[str]] = None  # A CASTEP special thing
    positions_frac: List[List[float]] = []
    for line in out:
        fields = line.split()
        if len(fields) == 7:
            break
    for _ in range(n_atoms):
        spec_custom = fields[1].split(':', 1)
        elem = spec_custom[0]
        if len(spec_custom) > 1 and custom_species is None:
            # Add it to the custom info!
            custom_species = list(species)
        species.append(elem)
        if custom_species is not None:
            custom_species.append(fields[1])
        positions_frac.append([float(s) for s in fields[3:6]])
        line = out.readline()
        fields = line.split()
    return species, custom_species, positions_frac


def _read_stress(out: io.TextIOBase):
    """Read a block for the stress tensor from a .castep file."""
    for line in out:
        fields = line.split()
        if len(fields) == 6:
            break
    results = {}
    stress = []
    for _ in range(3):
        stress.append([float(s) for s in fields[2:5]])
        line = out.readline()
        fields = line.split()
    # stress in .castep file is given in GPa
    results['stress'] = np.array(stress) * units.GPa
    results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]]
    line = out.readline()
    if "Pressure:" in line:
        results['pressure'] = float(
            line.split()[-2]) * units.GPa  # type: ignore[assignment]
    return results


def _add_atoms(
    images,
    lattice_real,
    species,
    custom_species,
    positions_frac,
    constraints,
    results,
):
    # If all the lattice parameters are fixed,
    # the `Unit Cell` block in the .castep file is not printed
    # in every ionic step.
    # The lattice paramters are therefore taken from the last step.
    # This happens:
    # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE`
    # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT`
    if lattice_real is None:
        lattice_real = images[-1].cell.copy()

    # for highly symmetric systems (where essentially only the
    # stress is optimized, but the atomic positions) positions
    # are only printed once.
    if species is None:
        species = images[-1].symbols
    if positions_frac is None:
        positions_frac = images[-1].get_scaled_positions()

    _set_energy_and_free_energy(results)

    atoms = Atoms(
        species,
        cell=lattice_real,
        constraint=constraints,
        pbc=True,
        scaled_positions=positions_frac,
    )
    if custom_species is not None:
        atoms.new_array(
            'castep_custom_species',
            np.array(custom_species),
        )

    atoms.calc = SinglePointCalculator(atoms)
    atoms.calc.results = results

    images.append(atoms)


def _read_mulliken_charges(out: io.TextIOBase):
    """Read a block for Mulliken charges from a .castep file."""
    for i in range(3):
        line = out.readline()
        if i == 1:
            spin_polarized = 'Spin' in line
    results = defaultdict(list)
    for line in out:
        fields = line.split()
        if len(fields) == 1:
            break
        if spin_polarized:
            if len(fields) != 7:  # due to CASTEP 18 outformat changes
                results['charges'].append(float(fields[-2]))
                results['magmoms'].append(float(fields[-1]))
        else:
            results['charges'].append(float(fields[-1]))
    return {k: np.array(v) for k, v in results.items()}


def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int):
    """Read the Hirshfeld analysis when iprint > 1 from a .castep file."""
    results = defaultdict(list)
    for _ in range(n_atoms):
        for line in out:
            if line.strip() == '':
                break  # end for each atom
            if 'Hirshfeld / free atomic volume :' in line:
                line = out.readline()
                fields = line.split()
                results['hirshfeld_volume_ratios'].append(float(fields[0]))
    return {k: np.array(v) for k, v in results.items()}


def _read_hirshfeld_charges(out: io.TextIOBase):
    """Read a block for Hirshfeld charges from a .castep file."""
    for i in range(3):
        line = out.readline()
        if i == 1:
            spin_polarized = 'Spin' in line
    results = defaultdict(list)
    for line in out:
        fields = line.split()
        if len(fields) == 1:
            break
        if spin_polarized:
            results['hirshfeld_charges'].append(float(fields[-2]))
            results['hirshfeld_magmoms'].append(float(fields[-1]))
        else:
            results['hirshfeld_charges'].append(float(fields[-1]))
    return {k: np.array(v) for k, v in results.items()}


def _set_energy_and_free_energy(results: Dict[str, Any]):
    """Set values referred to as `energy` and `free_energy`."""
    if 'energy_with_dispersion_correction' in results:
        suffix = '_with_dispersion_correction'
    else:
        suffix = '_without_dispersion_correction'

    if 'free_energy' + suffix in results:  # metallic
        keye = 'energy_zero' + suffix
        keyf = 'free_energy' + suffix
    else:  # non-metallic
        # The finite basis set correction is applied to the energy at finite T
        # (not the energy at 0 K). We should hence refer to the corrected value
        # as `energy` only when the free energy is unavailable, i.e., only when
        # FIX_OCCUPANCY : TRUE and thus no smearing is applied.
        if 'energy_with_finite_basis_set_correction' in results:
            keye = 'energy_with_finite_basis_set_correction'
        else:
            keye = 'energy' + suffix
        keyf = 'energy' + suffix

    results['energy'] = results[keye]
    results['free_energy'] = results[keyf]