1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
|
# Portable Python script for numerical output file comparisons of MOPAC
# Argument list: <output file #1> <output file #2>
from shutil import copyfile
from sys import argv
import subprocess
import os
import re
import numpy as np
# MOPAC testing has historically been based on checking that the output files corresponding
# to a test set of input files remain consistent with the output of past MOPAC versions.
# This Python script automates such a test, assuming that we allow for small deviations
# between numerical outputs (with a uniform threshold for simplicity).
# All version/system-dependent output (timing & version info) is ignored.
# Eigenvectors are only stable in the sense of distance between degenerate subspaces,
# which requires more careful identification and analysis of eigenvector blocks.
# We cannot test the stability of all eigenvector information (if we cannot guarantee
# completeness of a degenerate subspace), and untestable data is ignored.
# In principle, we could interpret the symmetry labels to use some of the edge data when
# we can independently determine the size of the subspace, but this is way more trouble than it is worth.
# This comparison is insensitive to differences in whitespace and number of empty lines.
# Some input files used for testing contain reference data in comments, which are ignored here.
# More fine-grained numerical tests using PyTest are planned after development of a Python interface for MOPAC,
# which will make it easier to assign different numerical tolerances to different quantities
NUMERIC_THRESHOLD = 1e-4 # large because of numerical errors in unoccupied orbital energies, energy gradients, & relaxed geometries
HEAT_THRESHOLD = 1e-4
DEGENERACY_THRESHOLD = 1e-3
EIGVEC_THRESHOLD = 1e-3
# regular expression pattern for a time stamp or other signifier of timing output, "CLOCK" or "TIME" or "SECONDS", & system-dependent versioning
skip_criteria = re.compile(r'([A-Z][a-z][a-z] [A-Z][a-z][a-z] [ 0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9])'
r'|(CLOCK)|(TIME)|(SECONDS)|(Version)|(THE VIBRATIONAL FREQUENCY)|(ITERATION)|(SCF CALCULATIONS)|(Stewart)'
r'|(remaining)|(\* THREADS)|(\* ISOTOPE)|(\* DENOUT)|(\* OLDENS)|(\* SETUP)|(ITER.)|(\*\* )|(web-site)|(MOPAC)|(GRADIENT NORM)')
# regular expression pattern for an eigenvector block
eigen_criteria = re.compile(r'(Root No.)|(ROOT NO.)')
def is_float(string):
'''check if a string contains a float'''
try:
float(string.replace('D','E'))
return True
except ValueError:
return False
def to_float(string):
'''check if a string contains a float'''
try:
return float(string.replace('D','E'))
except ValueError:
return False
def parse_mopac_out_file(path):
'''parse a MOPAC output file at a given path into a list of basic elements (strings, numbers, matrices)'''
parse_line = []
parse_list = []
mode = 'standard'
with open(path,'r') as file:
for line_num, line in enumerate(file):
# this hack separates floats that don't have a space between them because of a minus sign & trailing comma's
word_list = line.replace('-',' -').replace('E -','E-').replace('D -','D-').replace('=',' = ').replace(',',' , ').split()
# ignore molecular dimensions block
if 'MOLECULAR DIMENSIONS (Angstroms)' in line:
mode = 'dim'
continue
elif mode == 'dim':
if 'SCF CALCULATIONS' in line:
mode = 'standard'
else:
continue
# switch to or continue iter mode
if 'RHF CALCULATION' in line or 'UHF CALCULATION' in line or 'Geometry optimization using BFGS' in line:
mode = 'iter'
continue
elif mode == 'iter':
if 'SCF FIELD WAS ACHIEVED' in line or 'THERE IS NOT ENOUGH TIME FOR ANOTHER CYCLE' in line:
mode = 'standard'
else:
continue
# skip lines as necessary
if skip_criteria.search(line):
continue
# switch to or continue geo mode
if 'ATOM CHEMICAL BOND LENGTH BOND ANGLE TWIST ANGLE' in line:
mode = 'geo'
elif mode == 'geo':
if len(word_list) == 0:
mode = 'standard'
else:
continue
# switch to or continue lmo mode
if 'NUMBER OF CENTERS LMO ENERGY COMPOSITION OF ORBITALS' in line:
mode = 'lmo'
elif mode == 'lmo':
if 'LOCALIZED ORBITALS' in line:
mode = 'standard'
else:
continue
# switch to or continue grad mode
if 'LARGEST ATOMIC GRADIENTS' in line:
mode = 'grad'
blank_count = 0
# simple-minded skipping based on counting blank lines
elif mode == 'grad':
if len(word_list) == 0:
blank_count += 1
if blank_count == 3:
mode = 'standard'
else:
continue
# switch to or continue vibe mode
if 'DESCRIPTION OF VIBRATIONS' in line:
mode = 'vibe'
elif mode == 'vibe':
if 'FORCE CONSTANT IN INTERNAL COORDINATES' in line or 'SYMMETRY NUMBER FOR POINT-GROUP' in line:
mode = 'standard'
else:
continue
# switch to or continue eigen mode
if eigen_criteria.search(line):
if mode != 'eigen':
eigen_line_num = line_num+1
mode = 'eigen'
label_list = []
value_list = []
vector_list = []
num_eigen = []
label_list += [ int(word) for word in word_list[2:] ]
num_eigen.append(len(word_list) - 2)
# eigen parsing
elif mode == 'eigen':
# save eigenvalues in a list
if len(word_list) == num_eigen[-1] and len(value_list) < len(label_list):
# check if the list of numbers is just another label
label_check = True
try:
for word,label in zip(word_list,label_list[-len(word_list):]):
if int(word) != label:
label_check = False
except ValueError:
label_check = False
if label_check == False:
value_list += [ float(word) for word in word_list ]
# ignore symmetry labels
elif len(word_list) == 2*num_eigen[-1] and is_float(word_list[-2]) and not is_float(word_list[-1]):
pass
# save eigenvectors in a matrix
elif len(word_list) > num_eigen[-1] and all([is_float(word) for word in word_list[-num_eigen[-1]:]]):
vector_list += [ float(word) for word in word_list[-num_eigen[-1]:] ]
# ignore blank lines
elif len(word_list) == 0:
pass
# switch back to standard mode & reformat eigenvectors
else:
mode = 'standard'
# reshape into a matrix
nrow = len(vector_list) // len(label_list)
ncol = len(label_list)
eigenmatrix = np.empty((nrow,ncol))
offset = 0
for num in num_eigen:
eigenmatrix[:,offset:offset+num] = np.reshape(vector_list[offset*nrow:(offset+num)*nrow],(nrow,num),order='C')
offset += num
# renormalize the eigenvectors (MOPAC uses a variety of normalizations)
for col in eigenmatrix.T:
col /= np.linalg.norm(col)
# output eigenvalue (if known) and eigenvectors
if len(value_list) == len(label_list):
parse_list.append((value_list,eigenmatrix,label_list[0] == 1,label_list[-1] == nrow))
else:
parse_list.append((label_list,eigenmatrix,label_list[0] == 1,label_list[-1] == nrow))
parse_line.append(eigen_line_num)
# standard parsing
if mode == 'standard':
for word in word_list:
if is_float(word):
if 'FINAL HEAT OF FORMATION =' in line and word is word_list[5]:
parse_list.append(('HOF',to_float(word)))
else:
parse_list.append(to_float(word))
else:
parse_list.append(word)
parse_line.append(line_num+1)
return parse_line, parse_list
def compare_mopac_out_file(out_line, out_list, ref_line, ref_list, heat_error_threshold):
'''Compares the output to the given reference'''
if len(ref_list) != len(out_list):
dump = open("ref.dump", "w")
for item in ref_list:
dump.write(f"{item}\n")
dump.close()
dump = open("out.dump", "w")
for item in out_list:
dump.write(f"{item}\n")
dump.close()
assert len(ref_list) == len(out_list), f'ERROR: output file size mismatch, {len(ref_list)} vs. {len(out_list)}'
#print(f'WARNING: output file size mismatch, {len(ref_list)} vs. {len(out_list)}')
for (out_line0, out, ref_line0, ref) in zip(out_line, out_list, ref_line, ref_list):
# print(ref, "vs.", out)
# check that types match
assert type(ref) == type(out), f'ERROR: type mismatch between {ref} on reference line {ref_line0} in {argv[1]} and {out} on output line {out_line0} in {argv[2]}'
# compare strings
if type(ref) is str:
assert ref == out, f'ERROR: string mismatch between {ref} on reference line {ref_line0} in {argv[1]} and {out} on output line {out_line0} in {argv[2]}'
# compare floats
elif type(ref) is float:
# assert abs(ref - out) < NUMERIC_THRESHOLD, f'ERROR: numerical mismatch between {ref} and {out} on output line {line}'
if abs(ref - out) > NUMERIC_THRESHOLD:
print(f'WARNING: numerical mismatch between {ref} on reference line {ref_line0} in {argv[1]} and {out} on output line {out_line0} in {argv[2]}')
# compare heats of formation
elif len(ref) == 2:
assert abs(ref[1] - out[1]) < heat_error_threshold, f'ERROR: numerical heat mismatch between {ref[1]} on reference line {ref_line0} and {out[1]} on output line {out_line0}'
if abs(ref[1] - out[1]) > HEAT_THRESHOLD:
print(f'WARNING: numerical heat mismatch between {ref[1]} on reference line {ref_line0} in {argv[1]} and {out[1]} on output line {out_line0} in {argv[2]}')
# compare eigenvalues & eigenvectors
elif len(ref) == 4:
ref_val, ref_vec, ref_begin, ref_end = ref
out_val, out_vec, ref_begin, ref_end = out
for refv, outv in zip(ref_val,out_val):
# assert abs(refv - outv) < NUMERIC_THRESHOLD, f'ERROR: numerical mismatch between {refv} and {outv} on output line {line}'
if abs(refv - outv) > NUMERIC_THRESHOLD:
print(f'WARNING: eigenvalue mismatch between {refv} on reference line {ref_line0} in {argv[1]} and {outv} on output line {out_line0} in {argv[2]}')
# build list of edges denoting degenerate subspaces
if ref_begin:
edge_list = [0]
else:
edge_list = []
edge_list += [ i+1 for i in range(len(ref_val)-1) if np.abs(ref_val[i] - ref_val[i+1]) > DEGENERACY_THRESHOLD ]
if ref_end:
edge_list += [len(ref_val)]
# test the distance between each pair of degenerate subspaces
for i in range(len(edge_list)-1):
overlap = ref_vec[:,edge_list[i]:edge_list[i+1]].T @ out_vec[:,edge_list[i]:edge_list[i+1]]
# print("overlap = ",overlap)
sval = np.linalg.svd(overlap, compute_uv=False)
assert (sval[0] < 1.0 + EIGVEC_THRESHOLD) and (sval[-1] > 1.0 - EIGVEC_THRESHOLD), \
f'ERROR: degenerate subspace mismatch between reference line {ref_line0} and output line {out_line0}, overlap range in [{min(sval)},{max(sval)}]'
# stub main for command-line comparisons
if __name__ == "__main__":
# parse the 2 output files that we are comparing
ref_line, ref_list = parse_mopac_out_file(argv[1])
out_line, out_list = parse_mopac_out_file(argv[2])
# Run the comparison
compare_mopac_out_file(out_line, out_list, ref_line, ref_list)
|