File: orca.py

package info (click to toggle)
python-ase 3.26.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 15,484 kB
  • sloc: python: 148,112; xml: 2,728; makefile: 110; javascript: 47
file content (347 lines) | stat: -rw-r--r-- 11,150 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
import re
import warnings
from collections.abc import Iterable, Iterator
from io import StringIO
from pathlib import Path
from typing import List, Optional, Sequence

import numpy as np

from ase import Atoms
from ase.calculators.calculator import PropertyNotImplementedError
from ase.calculators.singlepoint import SinglePointDFTCalculator
from ase.io import ParseError, read
from ase.units import Bohr, Hartree
from ase.utils import deprecated, reader, writer


@reader
def read_geom_orcainp(fd):
    """Method to read geometry from an ORCA input file."""
    lines = fd.readlines()

    # Find geometry region of input file.
    stopline = 0
    for index, line in enumerate(lines):
        if line[1:].startswith('xyz '):
            startline = index + 1
            stopline = -1
        elif line.startswith('end') and stopline == -1:
            stopline = index
        elif line.startswith('*') and stopline == -1:
            stopline = index
    # Format and send to read_xyz.
    xyz_text = '%i\n' % (stopline - startline)
    xyz_text += ' geometry\n'
    for line in lines[startline:stopline]:
        xyz_text += line
    atoms = read(StringIO(xyz_text), format='xyz')
    atoms.set_cell((0.0, 0.0, 0.0))  # no unit cell defined

    return atoms


@writer
def write_orca(fd, atoms, params):
    # conventional filename: '<name>.inp'
    fd.write(f'! {params["orcasimpleinput"]} \n')
    fd.write(f'{params["orcablocks"]} \n')

    if 'coords' not in params['orcablocks']:
        fd.write('*xyz')
        fd.write(' %d' % params['charge'])
        fd.write(' %d \n' % params['mult'])
        for atom in atoms:
            if atom.tag == 71:  # 71 is ascii G (Ghost)
                symbol = atom.symbol + ' : '
            else:
                symbol = atom.symbol + '   '
            fd.write(
                symbol
                + str(atom.position[0])
                + ' '
                + str(atom.position[1])
                + ' '
                + str(atom.position[2])
                + '\n'
            )
        fd.write('*\n')


def read_charge(lines: List[str]) -> Optional[float]:
    """Read sum of atomic charges."""
    charge = None
    for line in lines:
        if 'Sum of atomic charges' in line:
            charge = float(line.split()[-1])
    return charge


def read_energy(lines: List[str]) -> Optional[float]:
    """Read energy."""
    energy = None
    for line in lines:
        if 'FINAL SINGLE POINT ENERGY' in line:
            if 'Wavefunction not fully converged' in line:
                energy = float('nan')
            else:
                energy = float(line.split()[-1])
    if energy is not None:
        return energy * Hartree
    return energy


def read_center_of_mass(lines: List[str]) -> Optional[np.ndarray]:
    """Scan through text for the center of mass"""
    # Example:
    # 'The origin for moment calculation is the CENTER OF MASS  =
    # ( 0.002150, -0.296255  0.086315)'
    # Note the missing comma in the output
    com = None
    for line in lines:
        if 'The origin for moment calculation is the CENTER OF MASS' in line:
            line = re.sub(r'[(),]', '', line)
            com = np.array([float(_) for _ in line.split()[-3:]])
    if com is not None:
        return com * Bohr  # return the last match
    return com


def read_dipole(lines: List[str]) -> Optional[np.ndarray]:
    """Read dipole moment.

    Note that the read dipole moment is for the COM frame of reference.
    """
    dipole = None
    for line in lines:
        if 'Total Dipole Moment' in line:
            dipole = np.array([float(x) for x in line.split()[-3:]]) * Bohr
    return dipole  # Return the last match


def _read_atoms(lines: Sequence[str]) -> Atoms:
    """Read atomic positions and symbols. Create Atoms object."""
    line_start = -1
    natoms = 0

    for ll, line in enumerate(lines):
        if 'Number of atoms' in line:
            natoms = int(line.split()[4])
        elif 'CARTESIAN COORDINATES (ANGSTROEM)' in line:
            line_start = ll + 2

    # Check if atoms present and if their number is given.
    if line_start == -1:
        raise ParseError(
            'No information about the structure in the ORCA output file.'
        )
    elif natoms == 0:
        raise ParseError(
            'No information about number of atoms in the ORCA output file.'
        )

    positions = np.zeros((natoms, 3))
    symbols = [''] * natoms

    for ll, line in enumerate(lines[line_start : (line_start + natoms)]):
        inp = line.split()
        positions[ll, :] = [float(pos) for pos in inp[1:4]]
        symbols[ll] = inp[0]

    atoms = Atoms(symbols=symbols, positions=positions)
    atoms.set_pbc([False, False, False])

    return atoms


def read_forces(lines: List[str]) -> Optional[np.ndarray]:
    """Read forces from output file if available. Else return None.

    Taking the forces from the output files (instead of the engrad-file) to
    be more general. The forces can be present in general output even if
    the engrad file is not there.

    Note: If more than one geometry relaxation step is available,
          forces do not always exist for the first step. In this case, for
          the first step an array of None will be returned. The following
          relaxation steps will then have forces available.
    """
    line_start = -1
    natoms = 0
    record_gradient = True

    for ll, line in enumerate(lines):
        if 'Number of atoms' in line:
            natoms = int(line.split()[4])
        # Read in only first set of forces for each chunk
        # (Excited state calculations can have several sets of
        # forces per chunk)
        elif 'CARTESIAN GRADIENT' in line and record_gradient:
            line_start = ll + 3
            record_gradient = False

    # Check if number of atoms is available.
    if natoms == 0:
        raise ParseError(
            'No information about number of atoms in the ORCA output file.'
        )

    # Forces are not always available. If not available, return None.
    if line_start == -1:
        return None

    forces = np.zeros((natoms, 3))

    for ll, line in enumerate(lines[line_start : (line_start + natoms)]):
        inp = line.split()
        forces[ll, :] = [float(pos) for pos in inp[3:6]]

    forces *= -Hartree / Bohr
    return forces


def get_chunks(lines: Iterable[str]) -> Iterator[list[str]]:
    """Separate out the chunks for each geometry relaxation step."""
    finished = False
    relaxation_finished = False
    relaxation = False

    chunk_endings = [
        'ORCA TERMINATED NORMALLY',
        'ORCA GEOMETRY RELAXATION STEP',
    ]
    chunk_lines = []
    for line in lines:
        # Assemble chunks
        if any([ending in line for ending in chunk_endings]):
            chunk_lines.append(line)
            yield chunk_lines
            chunk_lines = []
        else:
            chunk_lines.append(line)

        if 'ORCA TERMINATED NORMALLY' in line:
            finished = True

        if 'THE OPTIMIZATION HAS CONVERGED' in line:
            relaxation_finished = True

        # Check if calculation is an optimization.
        if 'ORCA SCF GRADIENT CALCULATION' in line:
            relaxation = True

    # Give error if calculation not finished for single-point calculations.
    if not finished and not relaxation:
        raise ParseError('Error: Calculation did not finish!')
    # Give warning if calculation not finished for geometry optimizations.
    elif not finished and relaxation:
        warnings.warn('Calculation did not finish!')
    # Calculation may have finished, but relaxation may have not.
    elif not relaxation_finished and relaxation:
        warnings.warn('Geometry optimization did not converge!')


@reader
def read_orca_output(fd, index=slice(None)):
    """From the ORCA output file: Read Energy, positions, forces
       and dipole moment.

    Create separated atoms object for each geometry frame through
    parsing the output file in chunks.
    """
    images = []

    # Iterate over chunks and create a separate atoms object for each
    for chunk in get_chunks(fd):
        energy = read_energy(chunk)
        atoms = _read_atoms(chunk)
        forces = read_forces(chunk)
        dipole = read_dipole(chunk)
        charge = read_charge(chunk)
        com = read_center_of_mass(chunk)

        # Correct dipole moment for centre-of-mass
        if com is not None and dipole is not None:
            dipole = dipole + com * charge

        atoms.calc = SinglePointDFTCalculator(
            atoms,
            energy=energy,
            free_energy=energy,
            forces=forces,
            # stress=self.stress,
            # stresses=self.stresses,
            # magmom=self.magmom,
            dipole=dipole,
            # dielectric_tensor=self.dielectric_tensor,
            # polarization=self.polarization,
        )
        # collect images
        images.append(atoms)

    return images[index]


@reader
def read_orca_engrad(fd):
    """Read Forces from ORCA .engrad file."""
    getgrad = False
    gradients = []
    tempgrad = []
    for _, line in enumerate(fd):
        if line.find('# The current gradient') >= 0:
            getgrad = True
            gradients = []
            tempgrad = []
            continue
        if getgrad and '#' not in line:
            grad = line.split()[-1]
            tempgrad.append(float(grad))
            if len(tempgrad) == 3:
                gradients.append(tempgrad)
                tempgrad = []
        if '# The at' in line:
            getgrad = False

    forces = -np.array(gradients) * Hartree / Bohr
    return forces


@deprecated(
    'Please use ase.io.read instead of read_orca_outputs, e.g.,\n'
    'from ase.io import read \n'
    'atoms = read("orca.out")',
    DeprecationWarning,
)
def read_orca_outputs(directory, stdout_path):
    """Reproduces old functionality of reading energy, forces etc
       directly from output without creation of atoms object.
       This is kept to ensure backwards compatability
    .. deprecated:: 3.24.0
       Use of read_orca_outputs is deprected, please
       process ORCA output by using ase.io.read
       e.g., read('orca.out')"
    """
    stdout_path = Path(stdout_path)
    atoms = read_orca_output(stdout_path, index=-1)
    results = {}
    results['energy'] = atoms.get_total_energy()
    results['free_energy'] = atoms.get_total_energy()

    try:
        results['dipole'] = atoms.get_dipole_moment()
    except PropertyNotImplementedError:
        pass

    # Does engrad always exist? - No!
    # Will there be other files -No -> We should just take engrad
    # as a direct argument.  Or maybe this function does not even need to
    # exist.
    engrad_path = stdout_path.with_suffix('.engrad')
    if engrad_path.is_file():
        results['forces'] = read_orca_engrad(engrad_path)
        print("""Warning: If you are reading in an engrad file from a
              geometry optimization, check very carefully.
              ORCA does not by default supply the forces for the
              converged geometry!""")
    return results