1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
|
# -*- coding: utf-8 -*-
#
# Copyright (c) 2019, the cclib development team
#
# This file is part of cclib (http://cclib.github.io) and is distributed under
# the terms of the BSD 3-Clause License.
"""Classes and tools for storing and handling parsed data"""
import logging
from collections import namedtuple
import numpy
from cclib.method import Electrons
from cclib.method import orbitals
Attribute = namedtuple('Attribute', ['type', 'json_key', 'attribute_path'])
class ccData(object):
"""Stores data extracted by cclib parsers
Description of cclib attributes:
aonames -- atomic orbital names (list of strings)
aooverlaps -- atomic orbital overlap matrix (array[2])
atombasis -- indices of atomic orbitals on each atom (list of lists)
atomcharges -- atomic partial charges (dict of arrays[1])
atomcoords -- atom coordinates (array[3], angstroms)
atommasses -- atom masses (array[1], daltons)
atomnos -- atomic numbers (array[1])
atomspins -- atomic spin densities (dict of arrays[1])
ccenergies -- molecular energies with Coupled-Cluster corrections (array[2], eV)
charge -- net charge of the system (integer)
coreelectrons -- number of core electrons in atom pseudopotentials (array[1])
enthalpy -- sum of electronic and thermal enthalpies (float, hartree/particle)
entropy -- entropy (float, hartree/particle)
etenergies -- energies of electronic transitions (array[1], 1/cm)
etoscs -- oscillator strengths of electronic transitions (array[1])
etrotats -- rotatory strengths of electronic transitions (array[1], ??)
etsecs -- singly-excited configurations for electronic transitions (list of lists)
etsyms -- symmetries of electronic transitions (list of string)
freeenergy -- sum of electronic and thermal free energies (float, hartree/particle)
fonames -- fragment orbital names (list of strings)
fooverlaps -- fragment orbital overlap matrix (array[2])
fragnames -- names of fragments (list of strings)
frags -- indices of atoms in a fragment (list of lists)
gbasis -- coefficients and exponents of Gaussian basis functions (PyQuante format)
geotargets -- targets for convergence of geometry optimization (array[1])
geovalues -- current values for convergence of geometry optmization (array[1])
grads -- current values of forces (gradients) in geometry optimization (array[3])
hessian -- elements of the force constant matrix (array[1])
homos -- molecular orbital indices of HOMO(s) (array[1])
metadata -- various metadata about the package and computation (dict)
mocoeffs -- molecular orbital coefficients (list of arrays[2])
moenergies -- molecular orbital energies (list of arrays[1], eV)
moments -- molecular multipole moments (list of arrays[], a.u.)
mosyms -- orbital symmetries (list of lists)
mpenergies -- molecular electronic energies with Møller-Plesset corrections (array[2], eV)
mult -- multiplicity of the system (integer)
natom -- number of atoms (integer)
nbasis -- number of basis functions (integer)
nmo -- number of molecular orbitals (integer)
nocoeffs -- natural orbital coefficients (array[2])
nooccnos -- natural orbital occupation numbers (array[1])
nsocoeffs -- natural spin orbital coefficients (list of array[2])
nsooccnos -- natural spin orbital occupation numbers (list of array[1])
optdone -- flags whether an optimization has converged (Boolean)
optstatus -- optimization status for each set of atomic coordinates (array[1])
polarizabilities -- (dipole) polarizabilities, static or dynamic (list of arrays[2])
pressure -- temperature used for Thermochemistry (float, atm)
scancoords -- geometries of each scan step (array[3], angstroms)
scanenergies -- energies of potential energy surface (list)
scannames -- names of varaibles scanned (list of strings)
scanparm -- values of parameters in potential energy surface (list of tuples)
scfenergies -- molecular electronic energies after SCF (Hartree-Fock, DFT) (array[1], eV)
scftargets -- targets for convergence of the SCF (array[2])
scfvalues -- current values for convergence of the SCF (list of arrays[2])
temperature -- temperature used for Thermochemistry (float, kelvin)
time -- time in molecular dynamics and other trajectories (array[1], fs)
transprop -- all absorption and emission spectra (dictionary {name:(etenergies, etoscs)})
WARNING: this attribute is not standardized and is liable to change in cclib 2.0
vibanharms -- vibrational anharmonicity constants (array[2], 1/cm)
vibdisps -- cartesian displacement vectors (array[3], delta angstrom)
vibfreqs -- vibrational frequencies (array[1], 1/cm)
vibirs -- IR intensities (array[1], km/mol)
vibramans -- Raman intensities (array[1], A^4/Da)
vibsyms -- symmetries of vibrations (list of strings)
(1) The term 'array' refers to a numpy array
(2) The number of dimensions of an array is given in square brackets
(3) Python indexes arrays/lists starting at zero, so if homos==[10], then
the 11th molecular orbital is the HOMO
"""
# The expected types for all supported attributes.
# The json_key is the key name used for attributes in the CJSON/JSON format
# 'TBD' - To Be Decided are the key names of attributes which haven't been included in the cjson format
_attributes = {
"aonames": Attribute(list, 'names', 'atoms:orbitals'),
"aooverlaps": Attribute(numpy.ndarray, 'overlaps', 'properties:orbitals'),
"atombasis": Attribute(list, 'indices', 'atoms:orbitals'),
"atomcharges": Attribute(dict, 'partial charges', 'properties'),
"atomcoords": Attribute(numpy.ndarray, 'coords', 'atoms:coords:3d'),
"atommasses": Attribute(numpy.ndarray, 'mass', 'atoms'),
"atomnos": Attribute(numpy.ndarray, 'number', 'atoms:elements'),
"atomspins": Attribute(dict, 'spins', 'atoms'),
"ccenergies": Attribute(numpy.ndarray, 'coupled cluster', 'properties:energy'),
"charge": Attribute(int, 'charge', 'properties'),
"coreelectrons": Attribute(numpy.ndarray, 'core electrons', 'atoms'),
"enthalpy": Attribute(float, 'enthalpy', 'properties'),
"entropy": Attribute(float, 'entropy', 'properties'),
"etenergies": Attribute(numpy.ndarray, 'electronic transitions', 'transitions'),
"etoscs": Attribute(numpy.ndarray, 'oscillator strength', 'transitions'),
"etrotats": Attribute(numpy.ndarray, 'rotatory strength', 'transitions'),
"etsecs": Attribute(list, 'one excited config', 'transitions'),
"etsyms": Attribute(list, 'symmetry', 'transitions'),
"freeenergy": Attribute(float, 'free energy', 'properties:energy'),
"fonames": Attribute(list, 'orbital names', 'fragments'),
"fooverlaps": Attribute(numpy.ndarray, 'orbital overlap', 'fragments'),
"fragnames": Attribute(list, 'fragment names', 'fragments'),
"frags": Attribute(list, 'atom indices', 'fragments'),
"gbasis": Attribute(list, 'basis functions', 'atoms:orbitals'),
"geotargets": Attribute(numpy.ndarray, 'geometric targets', 'optimization'),
"geovalues": Attribute(numpy.ndarray, 'geometric values', 'optimization'),
"grads": Attribute(numpy.ndarray, 'TBD', 'N/A'),
"hessian": Attribute(numpy.ndarray, 'hessian matrix', 'vibrations'),
"homos": Attribute(numpy.ndarray, 'homos', 'properties:orbitals'),
"metadata": Attribute(dict, 'TBD', 'N/A'),
"mocoeffs": Attribute(list, 'coeffs', 'properties:orbitals'),
"moenergies": Attribute(list, 'energies', 'properties:orbitals'),
"moments": Attribute(list, 'total dipole moment', 'properties'),
"mosyms": Attribute(list, 'molecular orbital symmetry', 'properties:orbitals'),
"mpenergies": Attribute(numpy.ndarray, 'moller plesset', 'properties:energy'),
"mult": Attribute(int, 'multiplicity', 'properties'),
"natom": Attribute(int, 'number of atoms', 'properties'),
"nbasis": Attribute(int, 'basis number', 'properties:orbitals'),
"nmo": Attribute(int, 'MO number', 'properties:orbitals'),
"nocoeffs": Attribute(numpy.ndarray, 'TBD', 'N/A'),
"nooccnos": Attribute(numpy.ndarray, 'TBD', 'N/A'),
"nsocoeffs": Attribute(list, 'TBD', 'N/A'),
"nsooccnos": Attribute(list, 'TBD', 'N/A'),
"optdone": Attribute(list, 'done', 'optimization'),
"optstatus": Attribute(numpy.ndarray, 'status', 'optimization'),
"polarizabilities": Attribute(list, 'polarizabilities', 'N/A'),
"pressure": Attribute(float, 'pressure', 'properties'),
"scancoords": Attribute(numpy.ndarray, 'step geometry', 'optimization:scan'),
"scanenergies": Attribute(list, 'PES energies', 'optimization:scan'),
"scannames": Attribute(list, 'variable names', 'optimization:scan'),
"scanparm": Attribute(list, 'PES parameter values', 'optimization:scan'),
"scfenergies": Attribute(numpy.ndarray, 'scf energies', 'optimization:scf'),
"scftargets": Attribute(numpy.ndarray, 'targets', 'optimization:scf'),
"scfvalues": Attribute(list, 'values', 'optimization:scf'),
"temperature": Attribute(float, 'temperature', 'properties'),
"time": Attribute(numpy.ndarray, 'time', 'N/A'),
"transprop": Attribute(dict, 'electronic transitions', 'transitions'),
"vibanharms": Attribute(numpy.ndarray, 'anharmonicity constants', 'vibrations'),
"vibdisps": Attribute(numpy.ndarray, 'displacement', 'vibrations'),
"vibfreqs": Attribute(numpy.ndarray, 'frequencies', 'vibrations'),
"vibirs": Attribute(numpy.ndarray, 'IR', 'vibrations:intensities'),
"vibramans": Attribute(numpy.ndarray, 'raman', 'vibrations:intensities'),
"vibsyms": Attribute(list, 'vibration symmetry', 'vibrations')
}
# The name of all attributes can be generated from the dictionary above.
_attrlist = sorted(_attributes.keys())
# Arrays are double precision by default, but these will be integer arrays.
_intarrays = ['atomnos', 'coreelectrons', 'homos', 'optstatus']
# Attributes that should be lists of arrays (double precision).
_listsofarrays = ['mocoeffs', 'moenergies', 'moments', 'polarizabilities', 'scfvalues']
# Attributes that should be dictionaries of arrays (double precision).
_dictsofarrays = ["atomcharges", "atomspins"]
# Possible statuses for optimization steps.
# OPT_UNKNOWN is the default and means optimization is in progress.
# OPT_NEW is set for every new optimization (e.g. PES, IRCs, etc.)
# OPT_DONE is set for the last step of an optimisation that converged.
# OPT_UNCONVERGED is set for every unconverged step (e.g. should be mutually exclusive with OPT_DONE)
# bit value notation allows coding for multiple states: OPT_NEW and OPT_UNCONVERGED or OPT_NEW and OPT_DONE.
OPT_UNKNOWN = 0b000
OPT_NEW = 0b001
OPT_UNCONVERGED = 0b010
OPT_DONE = 0b100
def __init__(self, attributes={}):
"""Initialize the cclibData object.
Normally called in the parse() method of a Logfile subclass.
Inputs:
attributes - optional dictionary of attributes to load as data
"""
if attributes:
self.setattributes(attributes)
def listify(self):
"""Converts all attributes that are arrays or lists/dicts of arrays to lists."""
attrlist = [k for k in self._attrlist if hasattr(self, k)]
for k in attrlist:
v = self._attributes[k].type
if v == numpy.ndarray:
setattr(self, k, getattr(self, k).tolist())
elif v == list and k in self._listsofarrays:
setattr(self, k, [x.tolist() for x in getattr(self, k)])
elif v == dict and k in self._dictsofarrays:
items = getattr(self, k).items()
pairs = [(key, val.tolist()) for key, val in items]
setattr(self, k, dict(pairs))
def arrayify(self):
"""Converts appropriate attributes to arrays or lists/dicts of arrays."""
attrlist = [k for k in self._attrlist if hasattr(self, k)]
for k in attrlist:
v = self._attributes[k].type
precision = 'd'
if k in self._intarrays:
precision = 'i'
if v == numpy.ndarray:
setattr(self, k, numpy.array(getattr(self, k), precision))
elif v == list and k in self._listsofarrays:
setattr(self, k, [numpy.array(x, precision) for x in getattr(self, k)])
elif v == dict and k in self._dictsofarrays:
items = getattr(self, k).items()
pairs = [(key, numpy.array(val, precision)) for key, val in items]
setattr(self, k, dict(pairs))
def getattributes(self, tolists=False):
"""Returns a dictionary of existing data attributes.
Inputs:
tolists - flag to convert attributes to lists where applicable
"""
if tolists:
self.listify()
attributes = {}
for attr in self._attrlist:
if hasattr(self, attr):
attributes[attr] = getattr(self, attr)
if tolists:
self.arrayify()
return attributes
def setattributes(self, attributes):
"""Sets data attributes given in a dictionary.
Inputs:
attributes - dictionary of attributes to set
Outputs:
invalid - list of attributes names that were not set, which
means they are not specified in self._attrlist
"""
if type(attributes) is not dict:
raise TypeError("attributes must be in a dictionary")
valid = [a for a in attributes if a in self._attrlist]
invalid = [a for a in attributes if a not in self._attrlist]
for attr in valid:
setattr(self, attr, attributes[attr])
self.arrayify()
self.typecheck()
return invalid
def typecheck(self):
"""Check the types of all attributes.
If an attribute does not match the expected type, then attempt to
convert; if that fails, only then raise a TypeError.
"""
self.arrayify()
for attr in [a for a in self._attrlist if hasattr(self, a)]:
val = getattr(self, attr)
if type(val) == self._attributes[attr].type:
continue
try:
val = self._attributes[attr].type(val)
except ValueError:
args = (attr, type(val), self._attributes[attr].type)
raise TypeError("attribute %s is %s instead of %s and could not be converted" % args)
def check_values(self, logger=logging):
"""Perform custom checks on the values of attributes."""
if hasattr(self, "etenergies") and any(e < 0 for e in self.etenergies):
negative_values = [e for e in self.etenergies if e < 0]
msg = ("At least one excitation energy is negative. "
"\nNegative values: %s\nFull etenergies: %s"
% (negative_values, self.etenergies))
logger.error(msg)
def write(self, filename=None, indices=None, *args, **kwargs):
"""Write parsed attributes to a file.
Possible extensions:
.cjson or .json - output a chemical JSON file
.cml - output a chemical markup language (CML) file
.xyz - output a Cartesian XYZ file of the last coordinates available
"""
from cclib.io import ccwrite
outputstr = ccwrite(self, outputdest=filename, indices=indices,
*args, **kwargs)
return outputstr
def writejson(self, filename=None, indices=None):
"""Write parsed attributes to a JSON file."""
return self.write(filename=filename, indices=indices,
outputtype='cjson')
def writecml(self, filename=None, indices=None):
"""Write parsed attributes to a CML file."""
return self.write(filename=filename, indices=indices,
outputtype='cml')
def writexyz(self, filename=None, indices=None):
"""Write parsed attributes to an XML file."""
return self.write(filename=filename, indices=indices,
outputtype='xyz')
@property
def converged_geometries(self):
"""
Return all converged geometries.
An array containing only the converged geometries, e.g.:
- For PES or IRCs, return all geometries for which optstatus matches OPT_DONE
- The converged geometry for simple optimisations
- The input geometry for single points
"""
if hasattr(self, 'optstatus'):
converged_indexes = [x for x, y in enumerate(self.optstatus) if y & self.OPT_DONE > 0]
return self.atomcoords[converged_indexes]
else:
return self.atomcoords
@property
def new_geometries(self):
"""
Return all starting geometries.
An array containing only the starting geometries, e.g.:
- For PES or IRCs, return all geometries for which optstatus matches OPT_NEW
- The input geometry for simple optimisations or single points
"""
if hasattr(self, 'optstatus'):
new_indexes = [x for x, y in enumerate(self.optstatus) if y & self.OPT_NEW > 0]
return self.atomcoords[new_indexes]
else:
return self.atomcoords
@property
def unknown_geometries(self):
"""
Return all OPT_UNKNOWN geometries.
An array containing only the starting geometries, e.g.:
- For PES or IRCs, return all geometries for which optstatus matches OPT_UNKNOWN
- The input geometry for simple optimisations or single points
"""
if hasattr(self, 'optstatus'):
unknown_indexes = [x for x, y in enumerate(self.optstatus) if y == self.OPT_UNKNOWN]
return self.atomcoords[unknown_indexes]
else:
return self.atomcoords
@property
def unconverged_geometries(self):
"""
Return all unconverged geometries.
An array containing only the starting geometries, e.g.:
- For PES or IRCs, return all geometries for which optstatus matches OPT_UNCONVERGED
- The input geometry for simple optimisations or single points
"""
if hasattr(self, 'optstatus'):
unconverged_indexes = [x for x, y in enumerate(self.optstatus) if y & self.OPT_UNCONVERGED > 0]
return self.atomcoords[unconverged_indexes]
else:
return self.atomcoords
@property
def nelectrons(self):
return Electrons(self).count()
@property
def closed_shell(self):
return orbitals.Orbitals(self).closed_shell()
class ccData_optdone_bool(ccData):
"""This is the version of ccData where optdone is a Boolean."""
def __init__(self, *args, **kwargs):
super(ccData_optdone_bool, self).__init__(*args, **kwargs)
self._attributes["optdone"] = Attribute(bool, 'done', 'optimization')
def setattributes(self, *args, **kwargs):
invalid = super(ccData_optdone_bool, self).setattributes(*args, **kwargs)
# Reduce optdone to a Boolean, because it will be parsed as a list. If this list has any element,
# it means that there was an optimized structure and optdone should be True.
if hasattr(self, 'optdone'):
self.optdone = len(self.optdone) > 0
|