File: compare_output.py

package info (click to toggle)
mopac 23.1.2-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 115,604 kB
  • sloc: f90: 165,274; python: 202; sh: 181; ansic: 85; makefile: 8
file content (283 lines) | stat: -rw-r--r-- 13,244 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# Portable Python script for numerical output file comparisons of MOPAC
# Argument list: <output file #1> <output file #2>

from shutil import copyfile
from sys import argv
import subprocess
import os
import re
import numpy as np

# MOPAC testing has historically been based on checking that the output files corresponding
# to a test set of input files remain consistent with the output of past MOPAC versions.
# This Python script automates such a test, assuming that we allow for small deviations
# between numerical outputs (with a uniform threshold for simplicity).
# All version/system-dependent output (timing & version info) is ignored.
# Eigenvectors are only stable in the sense of distance between degenerate subspaces,
# which requires more careful identification and analysis of eigenvector blocks.
# We cannot test the stability of all eigenvector information (if we cannot guarantee
# completeness of a degenerate subspace), and untestable data is ignored.
# In principle, we could interpret the symmetry labels to use some of the edge data when
# we can independently determine the size of the subspace, but this is way more trouble than it is worth.
# This comparison is insensitive to differences in whitespace and number of empty lines.
# Some input files used for testing contain reference data in comments, which are ignored here.

# More fine-grained numerical tests using PyTest are planned after development of a Python interface for MOPAC,
# which will make it easier to assign different numerical tolerances to different quantities

NUMERIC_THRESHOLD = 1e-4 # large because of numerical errors in unoccupied orbital energies, energy gradients, & relaxed geometries
HEAT_THRESHOLD = 1e-4
DEGENERACY_THRESHOLD = 1e-3
EIGVEC_THRESHOLD = 1e-3

# regular expression pattern for a time stamp or other signifier of timing output, "CLOCK" or "TIME" or "SECONDS", & system-dependent versioning
skip_criteria = re.compile(r'([A-Z][a-z][a-z] [A-Z][a-z][a-z] [ 0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9])'
                           r'|(CLOCK)|(TIME)|(SECONDS)|(Version)|(THE VIBRATIONAL FREQUENCY)|(ITERATION)|(SCF CALCULATIONS)|(Stewart)'
                           r'|(remaining)|(\*  THREADS)|(\*  ISOTOPE)|(\*  DENOUT)|(\*  OLDENS)|(\*  SETUP)|(ITER.)|(\*\* )|(web-site)|(MOPAC)|(GRADIENT NORM)')

# regular expression pattern for an eigenvector block
eigen_criteria = re.compile(r'(Root No.)|(ROOT NO.)')

def is_float(string):
    '''check if a string contains a float'''
    try:
        float(string.replace('D','E'))
        return True
    except ValueError:
        return False

def to_float(string):
    '''check if a string contains a float'''
    try:
        return float(string.replace('D','E'))
    except ValueError:
        return False

def parse_mopac_out_file(path):
    '''parse a MOPAC output file at a given path into a list of basic elements (strings, numbers, matrices)'''
    parse_line = []
    parse_list = []
    mode = 'standard'
    with open(path,'r') as file:
        for line_num, line in enumerate(file):

            # this hack separates floats that don't have a space between them because of a minus sign & trailing comma's
            word_list = line.replace('-',' -').replace('E -','E-').replace('D -','D-').replace('=',' = ').replace(',',' , ').split()

            # ignore molecular dimensions block
            if 'MOLECULAR DIMENSIONS (Angstroms)' in line:
                mode = 'dim'
                continue
            elif mode == 'dim':
                if 'SCF CALCULATIONS' in line:
                    mode = 'standard'
                else:
                    continue

            # switch to or continue iter mode
            if 'RHF CALCULATION' in line or 'UHF CALCULATION' in line or 'Geometry optimization using BFGS' in line:
                mode = 'iter'
                continue
            elif mode == 'iter':
                if 'SCF FIELD WAS ACHIEVED' in line or 'THERE IS NOT ENOUGH TIME FOR ANOTHER CYCLE' in line:
                    mode = 'standard'
                else:
                    continue

            # skip lines as necessary
            if skip_criteria.search(line):
                continue

            # switch to or continue geo mode
            if 'ATOM    CHEMICAL      BOND LENGTH      BOND ANGLE     TWIST ANGLE' in line:
                mode = 'geo'
            elif mode == 'geo':
                if len(word_list) == 0:
                    mode = 'standard'
                else:
                    continue

            # switch to or continue lmo mode
            if 'NUMBER OF CENTERS  LMO ENERGY     COMPOSITION OF ORBITALS' in line:
                mode = 'lmo'
            elif mode == 'lmo':
                if 'LOCALIZED ORBITALS' in line:
                    mode = 'standard'
                else:
                    continue

            # switch to or continue grad mode
            if 'LARGEST ATOMIC GRADIENTS' in line:
                mode = 'grad'
                blank_count = 0
            # simple-minded skipping based on counting blank lines
            elif mode == 'grad':
                if len(word_list) == 0:
                    blank_count += 1
                if blank_count == 3:
                    mode = 'standard'
                else:
                    continue

            # switch to or continue vibe mode
            if 'DESCRIPTION OF VIBRATIONS' in line:
                mode = 'vibe'
            elif mode == 'vibe':
                if 'FORCE CONSTANT IN INTERNAL COORDINATES' in line or 'SYMMETRY NUMBER FOR POINT-GROUP' in line:
                    mode = 'standard'
                else:
                    continue

            # switch to or continue eigen mode
            if eigen_criteria.search(line):
                if mode != 'eigen':
                    eigen_line_num = line_num+1
                    mode = 'eigen'
                    label_list = []
                    value_list = []
                    vector_list = []
                    num_eigen = []
                label_list += [ int(word) for word in word_list[2:] ]
                num_eigen.append(len(word_list) - 2)

            # eigen parsing
            elif mode == 'eigen':

                # save eigenvalues in a list
                if len(word_list) == num_eigen[-1] and len(value_list) < len(label_list):

                    # check if the list of numbers is just another label
                    label_check = True
                    try:
                        for word,label in zip(word_list,label_list[-len(word_list):]):
                            if int(word) != label:
                                label_check = False
                    except ValueError:
                        label_check = False

                    if label_check == False:
                        value_list += [ float(word) for word in word_list ]

                # ignore symmetry labels
                elif len(word_list) == 2*num_eigen[-1] and is_float(word_list[-2]) and not is_float(word_list[-1]):
                    pass

                # save eigenvectors in a matrix
                elif len(word_list) > num_eigen[-1] and all([is_float(word) for word in word_list[-num_eigen[-1]:]]):
                    vector_list += [ float(word) for word in word_list[-num_eigen[-1]:] ]

                # ignore blank lines
                elif len(word_list) == 0:
                    pass

                # switch back to standard mode & reformat eigenvectors
                else:
                    mode = 'standard'

                    # reshape into a matrix
                    nrow = len(vector_list) // len(label_list)
                    ncol = len(label_list)
                    eigenmatrix = np.empty((nrow,ncol))

                    offset = 0
                    for num in num_eigen:
                        eigenmatrix[:,offset:offset+num] = np.reshape(vector_list[offset*nrow:(offset+num)*nrow],(nrow,num),order='C')
                        offset += num

                    # renormalize the eigenvectors (MOPAC uses a variety of normalizations)
                    for col in eigenmatrix.T:
                        col /= np.linalg.norm(col)

                    # output eigenvalue (if known) and eigenvectors
                    if len(value_list) == len(label_list):
                        parse_list.append((value_list,eigenmatrix,label_list[0] == 1,label_list[-1] == nrow))
                    else:
                        parse_list.append((label_list,eigenmatrix,label_list[0] == 1,label_list[-1] == nrow))
                    parse_line.append(eigen_line_num)

            # standard parsing
            if mode == 'standard':
                for word in word_list:
                    if is_float(word):
                        if 'FINAL HEAT OF FORMATION =' in line and word is word_list[5]:
                            parse_list.append(('HOF',to_float(word)))
                        else:
                            parse_list.append(to_float(word))
                    else:
                        parse_list.append(word)
                    parse_line.append(line_num+1)

    return parse_line, parse_list

def compare_mopac_out_file(out_line, out_list, ref_line, ref_list, heat_error_threshold):
    '''Compares the output to the given reference'''

    if len(ref_list) != len(out_list):
        dump = open("ref.dump", "w")
        for item in ref_list:
            dump.write(f"{item}\n")
        dump.close()
        dump = open("out.dump", "w")
        for item in out_list:
            dump.write(f"{item}\n")
        dump.close()
        assert len(ref_list) == len(out_list), f'ERROR: output file size mismatch, {len(ref_list)} vs. {len(out_list)}'
        #print(f'WARNING: output file size mismatch, {len(ref_list)} vs. {len(out_list)}')

    for (out_line0, out, ref_line0, ref) in zip(out_line, out_list, ref_line, ref_list):
    #    print(ref, "vs.", out)
        # check that types match
        assert type(ref) == type(out), f'ERROR: type mismatch between {ref} on reference line {ref_line0} in {argv[1]} and {out} on output line {out_line0} in {argv[2]}'

        # compare strings
        if type(ref) is str:
            assert ref == out, f'ERROR: string mismatch between {ref} on reference line {ref_line0} in {argv[1]} and {out} on output line {out_line0} in {argv[2]}'

        # compare floats
        elif type(ref) is float:
    #        assert abs(ref - out) < NUMERIC_THRESHOLD, f'ERROR: numerical mismatch between {ref} and {out} on output line {line}'
            if abs(ref - out) > NUMERIC_THRESHOLD:
                print(f'WARNING: numerical mismatch between {ref} on reference line {ref_line0} in {argv[1]} and {out} on output line {out_line0} in {argv[2]}')

        # compare heats of formation
        elif len(ref) == 2:
            assert abs(ref[1] - out[1]) < heat_error_threshold, f'ERROR: numerical heat mismatch between {ref[1]} on reference line {ref_line0} and {out[1]} on output line {out_line0}'
            if abs(ref[1] - out[1]) > HEAT_THRESHOLD:
                print(f'WARNING: numerical heat mismatch between {ref[1]} on reference line {ref_line0} in {argv[1]} and {out[1]} on output line {out_line0} in {argv[2]}')
 
        # compare eigenvalues & eigenvectors
        elif len(ref) == 4:
            ref_val, ref_vec, ref_begin, ref_end = ref
            out_val, out_vec, ref_begin, ref_end = out

            for refv, outv in zip(ref_val,out_val):
    #            assert abs(refv - outv) < NUMERIC_THRESHOLD, f'ERROR: numerical mismatch between {refv} and {outv} on output line {line}'
                if abs(refv - outv) > NUMERIC_THRESHOLD:
                    print(f'WARNING: eigenvalue mismatch between {refv} on reference line {ref_line0} in {argv[1]} and {outv} on output line {out_line0} in {argv[2]}')

                # build list of edges denoting degenerate subspaces
                if ref_begin:
                    edge_list = [0]
                else:
                    edge_list = []
                edge_list += [ i+1 for i in range(len(ref_val)-1) if np.abs(ref_val[i] - ref_val[i+1]) > DEGENERACY_THRESHOLD ]
                if ref_end:
                    edge_list += [len(ref_val)]

                # test the distance between each pair of degenerate subspaces
                for i in range(len(edge_list)-1):
                    overlap = ref_vec[:,edge_list[i]:edge_list[i+1]].T @ out_vec[:,edge_list[i]:edge_list[i+1]]
    #                print("overlap = ",overlap)
                    sval = np.linalg.svd(overlap, compute_uv=False)
                    assert (sval[0] < 1.0 + EIGVEC_THRESHOLD) and (sval[-1] > 1.0 - EIGVEC_THRESHOLD), \
                        f'ERROR: degenerate subspace mismatch between reference line {ref_line0} and output line {out_line0}, overlap range in [{min(sval)},{max(sval)}]'

# stub main for command-line comparisons
if __name__ == "__main__":

    # parse the 2 output files that we are comparing
    ref_line, ref_list = parse_mopac_out_file(argv[1])
    out_line, out_list = parse_mopac_out_file(argv[2])

    # Run the comparison
    compare_mopac_out_file(out_line, out_list, ref_line, ref_list)