File: vasp_outcar_parsers.py

package info (click to toggle)
python-ase 3.24.0-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 15,448 kB
  • sloc: python: 144,945; xml: 2,728; makefile: 113; javascript: 47
file content (870 lines) | stat: -rw-r--r-- 29,544 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
"""
Module for parsing OUTCAR files.
"""
import re
from abc import ABC, abstractmethod
from pathlib import Path, PurePath
from typing import Any, Dict, Iterator, List, Optional, Sequence, TextIO, Union
from warnings import warn

import numpy as np

import ase
from ase import Atoms
from ase.calculators.singlepoint import (
    SinglePointDFTCalculator,
    SinglePointKPoint,
)
from ase.data import atomic_numbers
from ase.io import ParseError, read
from ase.io.utils import ImageChunk

# Denotes end of Ionic step for OUTCAR reading
_OUTCAR_SCF_DELIM = 'FREE ENERGIE OF THE ION-ELECTRON SYSTEM'

# Some type aliases
_HEADER = Dict[str, Any]
_CURSOR = int
_CHUNK = Sequence[str]
_RESULT = Dict[str, Any]


class NoNonEmptyLines(Exception):
    """No more non-empty lines were left in the provided chunck"""


class UnableToLocateDelimiter(Exception):
    """Did not find the provided delimiter"""

    def __init__(self, delimiter, msg):
        self.delimiter = delimiter
        super().__init__(msg)


def _check_line(line: str) -> str:
    """Auxiliary check line function for OUTCAR numeric formatting.
    See issue #179, https://gitlab.com/ase/ase/issues/179
    Only call in cases we need the numeric values
    """
    if re.search('[0-9]-[0-9]', line):
        line = re.sub('([0-9])-([0-9])', r'\1 -\2', line)
    return line


def find_next_non_empty_line(cursor: _CURSOR, lines: _CHUNK) -> _CURSOR:
    """Fast-forward the cursor from the current position to the next
    line which is non-empty.
    Returns the new cursor position on the next non-empty line.
    """
    for line in lines[cursor:]:
        if line.strip():
            # Line was non-empty
            return cursor
        # Empty line, increment the cursor position
        cursor += 1
    # There was no non-empty line
    raise NoNonEmptyLines("Did not find a next line which was not empty")


def search_lines(delim: str, cursor: _CURSOR, lines: _CHUNK) -> _CURSOR:
    """Search through a chunk of lines starting at the cursor position for
    a given delimiter. The new position of the cursor is returned."""
    for line in lines[cursor:]:
        if delim in line:
            # The cursor should be on the line with the delimiter now
            assert delim in lines[cursor]
            return cursor
        # We didn't find the delimiter
        cursor += 1
    raise UnableToLocateDelimiter(
        delim, f'Did not find starting point for delimiter {delim}')


def convert_vasp_outcar_stress(stress: Sequence):
    """Helper function to convert the stress line in an OUTCAR to the
    expected units in ASE """
    stress_arr = -np.array(stress)
    shape = stress_arr.shape
    if shape != (6, ):
        raise ValueError(
            f'Stress has the wrong shape. Expected (6,), got {shape}')
    stress_arr = stress_arr[[0, 1, 2, 4, 5, 3]] * 1e-1 * ase.units.GPa
    return stress_arr


def read_constraints_from_file(directory):
    directory = Path(directory)
    constraint = None
    for filename in ('CONTCAR', 'POSCAR'):
        if (directory / filename).is_file():
            constraint = read(directory / filename,
                              format='vasp',
                              parallel=False).constraints
            break
    return constraint


class VaspPropertyParser(ABC):
    NAME = None  # type: str

    @classmethod
    def get_name(cls):
        """Name of parser. Override the NAME constant in the class to
        specify a custom name,
        otherwise the class name is used"""
        return cls.NAME or cls.__name__

    @abstractmethod
    def has_property(self, cursor: _CURSOR, lines: _CHUNK) -> bool:
        """Function which checks if a property can be derived from a given
        cursor position"""

    @staticmethod
    def get_line(cursor: _CURSOR, lines: _CHUNK) -> str:
        """Helper function to get a line, and apply the check_line function"""
        return _check_line(lines[cursor])

    @abstractmethod
    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        """Extract a property from the cursor position.
        Assumes that "has_property" would evaluate to True
        from cursor position """


class SimpleProperty(VaspPropertyParser, ABC):
    LINE_DELIMITER = None  # type: str

    def __init__(self):
        super().__init__()
        if self.LINE_DELIMITER is None:
            raise ValueError('Must specify a line delimiter.')

    def has_property(self, cursor, lines) -> bool:
        line = lines[cursor]
        return self.LINE_DELIMITER in line


class VaspChunkPropertyParser(VaspPropertyParser, ABC):
    """Base class for parsing a chunk of the OUTCAR.
    The base assumption is that only a chunk of lines is passed"""

    def __init__(self, header: _HEADER = None):
        super().__init__()
        header = header or {}
        self.header = header

    def get_from_header(self, key: str) -> Any:
        """Get a key from the header, and raise a ParseError
        if that key doesn't exist"""
        try:
            return self.header[key]
        except KeyError:
            raise ParseError(
                'Parser requested unavailable key "{}" from header'.format(
                    key))


class VaspHeaderPropertyParser(VaspPropertyParser, ABC):
    """Base class for parsing the header of an OUTCAR"""


class SimpleVaspChunkParser(VaspChunkPropertyParser, SimpleProperty, ABC):
    """Class for properties in a chunk can be
    determined to exist from 1 line"""


class SimpleVaspHeaderParser(VaspHeaderPropertyParser, SimpleProperty, ABC):
    """Class for properties in the header
    which can be determined to exist from 1 line"""


class Spinpol(SimpleVaspHeaderParser):
    """Parse if the calculation is spin-polarized.

    Example line:
    "   ISPIN  =      2    spin polarized calculation?"

    """
    LINE_DELIMITER = 'ISPIN'

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        line = lines[cursor].strip()
        parts = line.split()
        ispin = int(parts[2])
        # ISPIN 2 = spinpolarized, otherwise no
        # ISPIN 1 = non-spinpolarized
        spinpol = ispin == 2
        return {'spinpol': spinpol}


class SpeciesTypes(SimpleVaspHeaderParser):
    """Parse species types.

    Example line:
    " POTCAR:    PAW_PBE Ni 02Aug2007"

    We must parse this multiple times, as it's scattered in the header.
    So this class has to simply parse the entire header.
    """
    LINE_DELIMITER = 'POTCAR:'

    def __init__(self, *args, **kwargs):
        self._species = []  # Store species as we find them
        # We count the number of times we found the line,
        # as we only want to parse every second,
        # due to repeated entries in the OUTCAR
        super().__init__(*args, **kwargs)

    @property
    def species(self) -> List[str]:
        """Internal storage of each found line.
        Will contain the double counting.
        Use the get_species() method to get the un-doubled list."""
        return self._species

    def get_species(self) -> List[str]:
        """The OUTCAR will contain two 'POTCAR:' entries per species.
        This method only returns the first half,
        effectively removing the double counting.
        """
        # Get the index of the first half
        # In case we have an odd number, we round up (for testing purposes)
        # Tests like to just add species 1-by-1
        # Having an odd number should never happen in a real OUTCAR
        # For even length lists, this is just equivalent to idx =
        # len(self.species) // 2
        idx = sum(divmod(len(self.species), 2))
        # Make a copy
        return list(self.species[:idx])

    def _make_returnval(self) -> _RESULT:
        """Construct the return value for the "parse" method"""
        return {'species': self.get_species()}

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        line = lines[cursor].strip()

        parts = line.split()
        # Determine in what position we'd expect to find the symbol
        if '1/r potential' in line:
            # This denotes an AE potential
            # Currently only H_AE
            # "  H  1/r potential  "
            idx = 1
        else:
            # Regular PAW potential, e.g.
            # "PAW_PBE H1.25 07Sep2000" or
            # "PAW_PBE Fe_pv 02Aug2007"
            idx = 2

        sym = parts[idx]
        # remove "_h", "_GW", "_3" tags etc.
        sym = sym.split('_')[0]
        # in the case of the "H1.25" potentials etc.,
        # remove any non-alphabetic characters
        sym = ''.join([s for s in sym if s.isalpha()])

        if sym not in atomic_numbers:
            # Check that we have properly parsed the symbol, and we found
            # an element
            raise ParseError(
                f'Found an unexpected symbol {sym} in line {line}')

        self.species.append(sym)

        return self._make_returnval()


class IonsPerSpecies(SimpleVaspHeaderParser):
    """Example line:

    "   ions per type =              32  31   2"
    """
    LINE_DELIMITER = 'ions per type'

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        line = lines[cursor].strip()
        parts = line.split()
        ion_types = list(map(int, parts[4:]))
        return {'ion_types': ion_types}


class KpointHeader(VaspHeaderPropertyParser):
    """Reads nkpts and nbands from the line delimiter.
    Then it also searches for the ibzkpts and kpt_weights"""

    def has_property(self, cursor: _CURSOR, lines: _CHUNK) -> bool:
        line = lines[cursor]
        return "NKPTS" in line and "NBANDS" in line

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        line = lines[cursor].strip()
        parts = line.split()
        nkpts = int(parts[3])
        nbands = int(parts[-1])

        results: Dict[str, Any] = {'nkpts': nkpts, 'nbands': nbands}
        # We also now get the k-point weights etc.,
        # because we need to know how many k-points we have
        # for parsing that
        # Move cursor down to next delimiter
        delim2 = 'k-points in reciprocal lattice and weights'
        for offset, line in enumerate(lines[cursor:], start=0):
            line = line.strip()
            if delim2 in line:
                # build k-points
                ibzkpts = np.zeros((nkpts, 3))
                kpt_weights = np.zeros(nkpts)
                for nk in range(nkpts):
                    # Offset by 1, as k-points starts on the next line
                    line = lines[cursor + offset + nk + 1].strip()
                    parts = line.split()
                    ibzkpts[nk] = list(map(float, parts[:3]))
                    kpt_weights[nk] = float(parts[-1])
                results['ibzkpts'] = ibzkpts
                results['kpt_weights'] = kpt_weights
                break
        else:
            raise ParseError('Did not find the K-points in the OUTCAR')

        return results


class Stress(SimpleVaspChunkParser):
    """Process the stress from an OUTCAR"""
    LINE_DELIMITER = 'in kB '

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        line = self.get_line(cursor, lines)
        result = None  # type: Optional[Sequence[float]]
        try:
            stress = [float(a) for a in line.split()[2:]]
        except ValueError:
            # Vasp FORTRAN string formatting issues, can happen with
            # some bad geometry steps Alternatively, we can re-raise
            # as a ParseError?
            warn('Found badly formatted stress line. Setting stress to None.')
        else:
            result = convert_vasp_outcar_stress(stress)
        return {'stress': result}


class Cell(SimpleVaspChunkParser):
    LINE_DELIMITER = 'direct lattice vectors'

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        nskip = 1
        cell = np.zeros((3, 3))
        for i in range(3):
            line = self.get_line(cursor + i + nskip, lines)
            parts = line.split()
            cell[i, :] = list(map(float, parts[0:3]))
        return {'cell': cell}


class PositionsAndForces(SimpleVaspChunkParser):
    """Positions and forces are written in the same block.
    We parse both simultaneously"""
    LINE_DELIMITER = 'POSITION          '

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        nskip = 2
        natoms = self.get_from_header('natoms')
        positions = np.zeros((natoms, 3))
        forces = np.zeros((natoms, 3))

        for i in range(natoms):
            line = self.get_line(cursor + i + nskip, lines)
            parts = list(map(float, line.split()))
            positions[i] = parts[0:3]
            forces[i] = parts[3:6]
        return {'positions': positions, 'forces': forces}


class Magmom(VaspChunkPropertyParser):
    def has_property(self, cursor: _CURSOR, lines: _CHUNK) -> bool:
        """ We need to check for two separate delimiter strings,
        to ensure we are at the right place """
        line = lines[cursor]
        if 'number of electron' in line:
            parts = line.split()
            if len(parts) > 5 and parts[0].strip() != "NELECT":
                return True
        return False

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        line = self.get_line(cursor, lines)
        parts = line.split()
        idx = parts.index('magnetization') + 1
        magmom_lst = parts[idx:]
        if len(magmom_lst) != 1:
            magmom: Union[np.ndarray, float] = np.array(
                list(map(float, magmom_lst))
            )
        else:
            magmom = float(magmom_lst[0])
        return {'magmom': magmom}


class Magmoms(VaspChunkPropertyParser):
    def has_property(self, cursor: _CURSOR, lines: _CHUNK) -> bool:
        line = lines[cursor]
        if 'magnetization (x)' in line:
            natoms = self.get_from_header('natoms')
            self.non_collinear = False
            if cursor + natoms + 9 < len(lines):
                line_y = self.get_line(cursor + natoms + 9, lines)
                if 'magnetization (y)' in line_y:
                    self.non_collinear = True
            return True
        return False

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:

        natoms = self.get_from_header('natoms')
        if self.non_collinear:
            magmoms = np.zeros((natoms, 3))
            nskip = 4  # Skip some lines
            for i in range(natoms):
                line = self.get_line(cursor + i + nskip, lines)
                magmoms[i, 0] = float(line.split()[-1])
            nskip = natoms + 13  # Skip some lines
            for i in range(natoms):
                line = self.get_line(cursor + i + nskip, lines)
                magmoms[i, 1] = float(line.split()[-1])
            nskip = 2 * natoms + 22  # Skip some lines
            for i in range(natoms):
                line = self.get_line(cursor + i + nskip, lines)
                magmoms[i, 2] = float(line.split()[-1])
        else:
            magmoms = np.zeros(natoms)
            nskip = 4  # Skip some lines
            for i in range(natoms):
                line = self.get_line(cursor + i + nskip, lines)
                magmoms[i] = float(line.split()[-1])

        return {'magmoms': magmoms}


class EFermi(SimpleVaspChunkParser):
    LINE_DELIMITER = 'E-fermi :'

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        line = self.get_line(cursor, lines)
        parts = line.split()
        efermi = float(parts[2])
        return {'efermi': efermi}


class Energy(SimpleVaspChunkParser):
    LINE_DELIMITER = _OUTCAR_SCF_DELIM

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        nskip = 2
        line = self.get_line(cursor + nskip, lines)
        parts = line.strip().split()
        energy_free = float(parts[4])  # Force consistent

        nskip = 4
        line = self.get_line(cursor + nskip, lines)
        parts = line.strip().split()
        energy_zero = float(parts[6])  # Extrapolated to 0 K

        return {'free_energy': energy_free, 'energy': energy_zero}


class Kpoints(VaspChunkPropertyParser):
    def has_property(self, cursor: _CURSOR, lines: _CHUNK) -> bool:
        line = lines[cursor]
        # Example line:
        # " spin component 1" or " spin component 2"
        # We only check spin up, as if we are spin-polarized, we'll parse that
        # as well
        if 'spin component 1' in line:
            parts = line.strip().split()
            # This string is repeated elsewhere, but not with this exact shape
            if len(parts) == 3:
                try:
                    # The last part of te line should be an integer, denoting
                    # spin-up or spin-down
                    int(parts[-1])
                except ValueError:
                    pass
                else:
                    return True
        return False

    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        nkpts = self.get_from_header('nkpts')
        nbands = self.get_from_header('nbands')
        weights = self.get_from_header('kpt_weights')
        spinpol = self.get_from_header('spinpol')
        nspins = 2 if spinpol else 1

        kpts = []
        for spin in range(nspins):
            # for Vasp 6, they added some extra information after the
            # spin components.  so we might need to seek the spin
            # component line
            cursor = search_lines(f'spin component {spin + 1}', cursor, lines)

            cursor += 2  # Skip two lines
            for _ in range(nkpts):
                # Skip empty lines
                cursor = find_next_non_empty_line(cursor, lines)

                line = self.get_line(cursor, lines)
                # Example line:
                # "k-point     1 :       0.0000    0.0000    0.0000"
                parts = line.strip().split()
                ikpt = int(parts[1]) - 1  # Make kpt idx start from 0
                weight = weights[ikpt]

                cursor += 2  # Move down two
                eigenvalues = np.zeros(nbands)
                occupations = np.zeros(nbands)
                for n in range(nbands):
                    # Example line:
                    # "      1      -9.9948      1.00000"
                    parts = lines[cursor].strip().split()
                    eps_n, f_n = map(float, parts[1:])
                    occupations[n] = f_n
                    eigenvalues[n] = eps_n
                    cursor += 1
                kpt = SinglePointKPoint(weight,
                                        spin,
                                        ikpt,
                                        eps_n=eigenvalues,
                                        f_n=occupations)
                kpts.append(kpt)

        return {'kpts': kpts}


class DefaultParsersContainer:
    """Container for the default OUTCAR parsers.
    Allows for modification of the global default parsers.

    Takes in an arbitrary number of parsers.
    The parsers should be uninitialized,
    as they are created on request.
    """

    def __init__(self, *parsers_cls):
        self._parsers_dct = {}
        for parser in parsers_cls:
            self.add_parser(parser)

    @property
    def parsers_dct(self) -> dict:
        return self._parsers_dct

    def make_parsers(self):
        """Return a copy of the internally stored parsers.
        Parsers are created upon request."""
        return [parser() for parser in self.parsers_dct.values()]

    def remove_parser(self, name: str):
        """Remove a parser based on the name.
        The name must match the parser name exactly."""
        self.parsers_dct.pop(name)

    def add_parser(self, parser) -> None:
        """Add a parser"""
        self.parsers_dct[parser.get_name()] = parser


class TypeParser(ABC):
    """Base class for parsing a type, e.g. header or chunk,
    by applying the internal attached parsers"""

    def __init__(self, parsers):
        self.parsers = parsers

    @property
    def parsers(self):
        return self._parsers

    @parsers.setter
    def parsers(self, new_parsers) -> None:
        self._check_parsers(new_parsers)
        self._parsers = new_parsers

    @abstractmethod
    def _check_parsers(self, parsers) -> None:
        """Check the parsers are of correct type"""

    def parse(self, lines) -> _RESULT:
        """Execute the attached paresers, and return the parsed properties"""
        properties = {}
        for cursor, _ in enumerate(lines):
            for parser in self.parsers:
                # Check if any of the parsers can extract a property
                # from this line Note: This will override any existing
                # properties we found, if we found it previously. This
                # is usually correct, as some VASP settings can cause
                # certain pieces of information to be written multiple
                # times during SCF. We are only interested in the
                # final values within a given chunk.
                if parser.has_property(cursor, lines):
                    prop = parser.parse(cursor, lines)
                    properties.update(prop)
        return properties


class ChunkParser(TypeParser, ABC):
    def __init__(self, parsers, header=None):
        super().__init__(parsers)
        self.header = header

    @property
    def header(self) -> _HEADER:
        return self._header

    @header.setter
    def header(self, value: Optional[_HEADER]) -> None:
        self._header = value or {}
        self.update_parser_headers()

    def update_parser_headers(self) -> None:
        """Apply the header to all available parsers"""
        for parser in self.parsers:
            parser.header = self.header

    def _check_parsers(self,
                       parsers: Sequence[VaspChunkPropertyParser]) -> None:
        """Check the parsers are of correct type 'VaspChunkPropertyParser'"""
        if not all(
                isinstance(parser, VaspChunkPropertyParser)
                for parser in parsers):
            raise TypeError(
                'All parsers must be of type VaspChunkPropertyParser')

    @abstractmethod
    def build(self, lines: _CHUNK) -> Atoms:
        """Construct an atoms object of the chunk from the parsed results"""


class HeaderParser(TypeParser, ABC):
    def _check_parsers(self,
                       parsers: Sequence[VaspHeaderPropertyParser]) -> None:
        """Check the parsers are of correct type 'VaspHeaderPropertyParser'"""
        if not all(
                isinstance(parser, VaspHeaderPropertyParser)
                for parser in parsers):
            raise TypeError(
                'All parsers must be of type VaspHeaderPropertyParser')

    @abstractmethod
    def build(self, lines: _CHUNK) -> _HEADER:
        """Construct the header object from the parsed results"""


class OutcarChunkParser(ChunkParser):
    """Class for parsing a chunk of an OUTCAR."""

    def __init__(self,
                 header: _HEADER = None,
                 parsers: Sequence[VaspChunkPropertyParser] = None):
        global default_chunk_parsers
        parsers = parsers or default_chunk_parsers.make_parsers()
        super().__init__(parsers, header=header)

    def build(self, lines: _CHUNK) -> Atoms:
        """Apply outcar chunk parsers, and build an atoms object"""
        self.update_parser_headers()  # Ensure header is in sync

        results = self.parse(lines)
        symbols = self.header['symbols']
        constraint = self.header.get('constraint', None)

        atoms_kwargs = dict(symbols=symbols, constraint=constraint, pbc=True)

        # Find some required properties in the parsed results.
        # Raise ParseError if they are not present
        for prop in ('positions', 'cell'):
            try:
                atoms_kwargs[prop] = results.pop(prop)
            except KeyError:
                raise ParseError(
                    'Did not find required property {} during parse.'.format(
                        prop))
        atoms = Atoms(**atoms_kwargs)

        kpts = results.pop('kpts', None)
        calc = SinglePointDFTCalculator(atoms, **results)
        if kpts is not None:
            calc.kpts = kpts
        calc.name = 'vasp'
        atoms.calc = calc
        return atoms


class OutcarHeaderParser(HeaderParser):
    """Class for parsing a chunk of an OUTCAR."""

    def __init__(self,
                 parsers: Sequence[VaspHeaderPropertyParser] = None,
                 workdir: Union[str, PurePath] = None):
        global default_header_parsers
        parsers = parsers or default_header_parsers.make_parsers()
        super().__init__(parsers)
        self.workdir = workdir

    @property
    def workdir(self):
        return self._workdir

    @workdir.setter
    def workdir(self, value):
        if value is not None:
            value = Path(value)
        self._workdir = value

    def _build_symbols(self, results: _RESULT) -> Sequence[str]:
        if 'symbols' in results:
            # Safeguard, in case a different parser already
            # did this. Not currently available in a default parser
            return results.pop('symbols')

        # Build the symbols of the atoms
        for required_key in ('ion_types', 'species'):
            if required_key not in results:
                raise ParseError(
                    'Did not find required key "{}" in parsed header results.'.
                    format(required_key))

        ion_types = results.pop('ion_types')
        species = results.pop('species')
        if len(ion_types) != len(species):
            raise ParseError(
                ('Expected length of ion_types to be same as species, '
                 'but got ion_types={} and species={}').format(
                     len(ion_types), len(species)))

        # Expand the symbols list
        symbols = []
        for n, sym in zip(ion_types, species):
            symbols.extend(n * [sym])
        return symbols

    def _get_constraint(self):
        """Try and get the constraints from the POSCAR of CONTCAR
        since they aren't located in the OUTCAR, and thus we cannot construct an
        OUTCAR parser which does this.
        """
        constraint = None
        if self.workdir is not None:
            constraint = read_constraints_from_file(self.workdir)
        return constraint

    def build(self, lines: _CHUNK) -> _RESULT:
        """Apply the header parsers, and build the header"""
        results = self.parse(lines)

        # Get the symbols from the parsed results
        # will pop the keys which we use for that purpose
        symbols = self._build_symbols(results)
        natoms = len(symbols)

        constraint = self._get_constraint()

        # Remaining results from the parse goes into the header
        header = dict(symbols=symbols,
                      natoms=natoms,
                      constraint=constraint,
                      **results)
        return header


class OUTCARChunk(ImageChunk):
    """Container class for a chunk of the OUTCAR which consists of a
    self-contained SCF step, i.e. and image. Also contains the header_data
    """

    def __init__(self,
                 lines: _CHUNK,
                 header: _HEADER,
                 parser: ChunkParser = None):
        super().__init__()
        self.lines = lines
        self.header = header
        self.parser = parser or OutcarChunkParser()

    def build(self):
        self.parser.header = self.header  # Ensure header is syncronized
        return self.parser.build(self.lines)


def build_header(fd: TextIO) -> _CHUNK:
    """Build a chunk containing the header data"""
    lines = []
    for line in fd:
        lines.append(line)
        if 'Iteration' in line:
            # Start of SCF cycle
            return lines

    # We never found the SCF delimiter, so the OUTCAR must be incomplete
    raise ParseError('Incomplete OUTCAR')


def build_chunk(fd: TextIO) -> _CHUNK:
    """Build chunk which contains 1 complete atoms object"""
    lines = []
    while True:
        line = next(fd)
        lines.append(line)
        if _OUTCAR_SCF_DELIM in line:
            # Add 4 more lines to include energy
            for _ in range(4):
                lines.append(next(fd))
            break
    return lines


def outcarchunks(fd: TextIO,
                 chunk_parser: ChunkParser = None,
                 header_parser: HeaderParser = None) -> Iterator[OUTCARChunk]:
    """Function to build chunks of OUTCAR from a file stream"""
    name = Path(fd.name)
    workdir = name.parent

    # First we get header info
    # pass in the workdir from the fd, so we can try and get the constraints
    header_parser = header_parser or OutcarHeaderParser(workdir=workdir)

    lines = build_header(fd)
    header = header_parser.build(lines)
    assert isinstance(header, dict)

    chunk_parser = chunk_parser or OutcarChunkParser()

    while True:
        try:
            lines = build_chunk(fd)
        except StopIteration:
            # End of file
            return
        yield OUTCARChunk(lines, header, parser=chunk_parser)


# Create the default chunk parsers
default_chunk_parsers = DefaultParsersContainer(
    Cell,
    PositionsAndForces,
    Stress,
    Magmoms,
    Magmom,
    EFermi,
    Kpoints,
    Energy,
)

# Create the default header parsers
default_header_parsers = DefaultParsersContainer(
    SpeciesTypes,
    IonsPerSpecies,
    Spinpol,
    KpointHeader,
)