1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
|
"""This module contains functions to read from QBox output files"""
from ase import Atom, Atoms
from ase.calculators.singlepoint import SinglePointCalculator
from ase.utils import reader
import re
import xml.etree.ElementTree as ET
# Compile regexs for fixing XML
re_find_bad_xml = re.compile(r'<(/?)([A-z]+) expectation ([a-z]+)')
@reader
def read_qbox(f, index=-1):
"""Read data from QBox output file
Inputs:
f - str or fileobj, path to file or file object to read from
index - int or slice, which frames to return
Returns:
list of Atoms or atoms, requested frame(s)
"""
# Check whether this is a QB@all output
version = None
for line in f:
if '<release>' in line:
version = ET.fromstring(line)
break
if version is None:
raise Exception('Parse Error: Version not found')
is_qball = 'qb@LL' in version.text or 'qball' in version.text
# Load in atomic species
species = dict()
if is_qball:
# Read all of the lines between release and the first call to `run`
species_data = []
for line in f:
if '<run' in line:
break
species_data.append(line)
species_data = '\n'.join(species_data)
# Read out the species information with regular expressions
symbols = re.findall('symbol_ = ([A-Z][a-z]?)', species_data)
masses = re.findall('mass_ = ([0-9.]+)', species_data)
names = re.findall('name_ = ([a-z]+)', species_data)
numbers = re.findall('atomic_number_ = ([0-9]+)', species_data)
# Compile them into a dictionary
for name, symbol, mass, number in zip(names, symbols, masses, numbers):
spec_data = dict(
symbol=symbol,
mass=float(mass),
number=float(number)
)
species[name] = spec_data
else:
# Find all species
species_blocks = _find_blocks(f, 'species', '<cmd>run')
for spec in species_blocks:
name = spec.get('name')
spec_data = dict(
symbol=spec.find('symbol').text,
mass=float(spec.find('mass').text),
number=int(spec.find('atomic_number').text))
species[name] = spec_data
# Find all of the frames
frames = _find_blocks(f, 'iteration', None)
# If index is an int, return one frame
if isinstance(index, int):
return _parse_frame(frames[index], species)
else:
return [_parse_frame(frame, species) for frame in frames[index]]
def _find_blocks(fp, tag, stopwords='[qbox]'):
"""Find and parse a certain block of the file.
Reads a file sequentially and stops when it either encounters the
end of the file, or until the it encounters a line that contains a
user-defined string *after it has already found at least one
desired block*. Use the stopwords ``[qbox]`` to read until the next
command is issued.
Groups the text between the first line that contains <tag> and the
next line that contains </tag>, inclusively. The function then
parses the XML and returns the Element object.
Inputs:
fp - file-like object, file to be read from
tag - str, tag to search for (e.g., 'iteration').
`None` if you want to read until the end of the file
stopwords - str, halt parsing if a line containing this string
is encountered
Returns:
list of xml.ElementTree, parsed XML blocks found by this class
"""
start_tag = '<%s' % tag
end_tag = '</%s>' % tag
blocks = [] # Stores all blocks
cur_block = [] # Block being filled
in_block = False # Whether we are currently parsing
for line in fp:
# Check if the block has started
if start_tag in line:
if in_block:
raise Exception('Parsing failed: Encountered nested block')
else:
in_block = True
# Add data to block
if in_block:
cur_block.append(line)
# Check for stopping conditions
if stopwords is not None:
if stopwords in line and len(blocks) > 0:
break
if end_tag in line:
if in_block:
blocks.append(cur_block)
cur_block = []
in_block = False
else:
raise Exception('Parsing failed: End tag found before start '
'tag')
# Join strings in a block into a single string
blocks = [''.join(b) for b in blocks]
# Ensure XML compatibility. There are two specific tags in QBall that are
# not valid XML, so we need to run a
blocks = [re_find_bad_xml.sub(r'<\1\2_expectation_\3', b) for b in blocks]
# Parse the blocks
return [ET.fromstring(b) for b in blocks]
def _parse_frame(tree, species):
"""Parse a certain frame from QBOX output
Inputs:
tree - ElementTree, <iteration> block from output file
species - dict, data about species. Key is name of atom type,
value is data about that type
Return:
Atoms object describing this iteration"""
# Load in data about the system
energy = float(tree.find("etotal").text)
# Load in data about the cell
unitcell = tree.find('atomset').find('unit_cell')
cell = []
for d in ['a', 'b', 'c']:
cell.append([float(x) for x in unitcell.get(d).split()])
stress_tree = tree.find('stress_tensor')
if stress_tree is None:
stresses = None
else:
stresses = [float(stress_tree.find('sigma_%s' % x).text)
for x in ['xx', 'yy', 'zz', 'yz', 'xz', 'xy']]
# Create the Atoms object
atoms = Atoms(pbc=True, cell=cell)
# Load in the atom information
forces = []
for atom in tree.find('atomset').findall('atom'):
# Load data about the atom type
spec = atom.get('species')
symbol = species[spec]['symbol']
mass = species[spec]['mass']
# Get data about position / velocity / force
pos = [float(x) for x in atom.find('position').text.split()]
force = [float(x) for x in atom.find('force').text.split()]
momentum = [float(x) * mass
for x in atom.find('velocity').text.split()]
# Create the objects
atom = Atom(symbol=symbol, mass=mass, position=pos, momentum=momentum)
atoms += atom
forces.append(force)
# Create the calculator object that holds energy/forces
calc = SinglePointCalculator(atoms,
energy=energy, forces=forces, stress=stresses)
atoms.calc = calc
return atoms
|