1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
|
#! /usr/bin/python
#codata-check: compare the CODATA values from
# https://physics.nist.gov/cuu/Constants/Table/allascii.txt
#against the values in GNU units.
import argparse, subprocess, sys
from decimal import Decimal
from functools import reduce
import re
VERSION = '1.9' #script version
# Python-3.8 is required because f-strings and "assignment expressions" are used.
# Beware: I've only actually tested against 3.12 though, so there
# may be something else that breaks with intermediate versions...
REQUIRED = (3,8) #python version requirement
def debug_print(*msg):
if args.debug:
print(*msg)
def warn(*msg):
print(*msg, file=sys.stderr)
def die(*msg):
warn(*msg)
sys.exit(1)
def try_open(file):
try:
return open(file, encoding="utf-8")
except Exception as e:
die(f"could not open {file}: {e}")
if sys.version_info < REQUIRED:
die(f"require a Python interpreter version {REQUIRED} or greater")
# handle formatting for describing "star-special" fields
def star_format(tag, left_expr, equality, right_expr):
if right_expr.startswith('*'):
return f"{tag}: [*] {left_expr} {equality} {right_expr[1:].strip()}"
return None
# check if the codata and gnu values match; allow for floating point rounding issues
# if the value is computed as an expression with more than one token
def check_fuzzy_equal(codata_name, codata_value, codata_unit, gnu_expr, gnu_value, fuzz):
is_derived = re.search(r"\W", gnu_expr)
if is_derived and not args.verbose:
# debug_print(f"ignoring derived value: {gnu_expr} ({codata_name})")
return
codata_value = codata_value.replace(" ", "").replace("...", "") #clean-up
gnu_value = gnu_value.strip()
gnu_expr = gnu_expr.strip()
vcodata = Decimal(codata_value).normalize()
vgnu = Decimal(gnu_value).normalize()
display_unit = "" if codata_unit == "1" else " "+codata_unit
if vcodata == vgnu:
if star_text := star_format("good", gnu_expr, "=", codata_name):
debug_print(star_text)
else:
debug_print(f"good: {gnu_expr} = {codata_value}{display_unit} = {codata_name}")
return
if is_derived:
# we are considering a derived value (not a primary value)
# make allowances for rounding errors in the computation(s) by allowing
# vgnu to vary from vcodata by up to "fuzz" decimal-ULPs of vcodata
vgnu = vgnu.quantize(vcodata) #round vgnu to match (decimal) sig-figs of vcodata
(_, digits_c, _) = vcodata.as_tuple()
(_, digits_g, _) = vgnu.as_tuple()
fn = lambda a,b: a*10 + b
if abs(reduce(fn, digits_c, 0) - reduce(fn, digits_g, 0)) <= fuzz:
if star_text := star_format("fuzzy-good", gnu_expr, "=", codata_name):
debug_print(star_text)
else:
debug_print(f"fuzzy-good: {gnu_expr} = {vgnu} ~ {codata_value}{display_unit} = {codata_name}")
return
print("derived value ", end="")
if star_text := star_format("mismatch", gnu_expr, "!=", codata_name):
print(star_text)
else:
print(f"mismatch: {gnu_expr} = {vgnu} != {codata_value}{display_unit} = {codata_name}")
def skip_header(f):
while line := f.readline():
if line.startswith("-----"):
break
# parse a line into a sequence of fields, each separated by at least two spaces
def parse_fields(line, nfields):
result = []
while line and len(result) < nfields:
spaces = line.find(" ")
if spaces < 0:
result.append(line)
break
result.append(line[:spaces])
line = line[spaces:].lstrip()
while len(result) < nfields:
result.append(None)
return result
# parse the mappings from the CODATA.map file
def get_mapping(map_file):
mapping = dict()
special = []
with try_open(map_file) as mapfile:
skip_header(mapfile)
for line in mapfile.readlines():
t = line.lstrip()
if t == "" or t.startswith("#"):
continue #ignore blank and comment lines
(gnu_expr, quantity) = parse_fields(line.rstrip(), 2)
mapping[quantity] = gnu_expr
if quantity.startswith("*"):
special.append(quantity)
return mapping, special
# verify that all "primary" values are defined in a definitions file
def validate_targets(unit_map, *definition_files):
unit_keys = dict()
# gather all of the defined unit names
first_word = re.compile(r'^\s*\+*\s*(\w+)\s')
for file in definition_files:
if not file:
continue
with try_open(file) as f:
for line in f:
if match:= re.search(first_word, line):
unit_keys[match.group(1)] = True
# ensure that all "primary" values in the map have a defintion match
problems = 0
for expr in unit_map.values():
if not re.search(r'\A[A-Za-z_]\w*\Z', expr):
continue
if not unit_keys.get(expr):
problems += 1
warn(f"undefined: {expr} is not found in unit definition file(s)")
if problems > 10:
sys.exit(2)
if problems > 0:
sys.exit(1)
# process NIST's allascii.txt CODTA file, comparing against the output from GNU units
def process_codata(codata_file, unit_map, specials, units_definitions, supplement_file, fuzz):
# phase 1: slurp the CODATA file into a data structure, and prepare input to "units"
units_stdin = []
entries = []
for expr1 in specials:
expr2 = unit_map[expr1]
entries.append( (expr1, "1", expr2, "1") )
expr1 = expr1[1:].lstrip() #remove the leading "*" marker
units_stdin.append(expr1) # request "have" expression
units_stdin.append(expr2) # target "want" unit
with try_open(codata_file) as data:
skip_header(data)
for line in data.readlines():
if line.lstrip() == "":
continue #ignore blank lines
(codata_quantity, codata_value, codata_uncertainty, unit) = parse_fields(line.rstrip(), 4)
if not unit:
unit = "1"
if gnu_expr := unit_map.get(codata_quantity, ""):
if gnu_expr == "?":
#marked as "don't know how to map", so ignore this line
if args.verbose:
debug_print(f"not evaluating '{codata_quantity}'")
continue
entries.append( (codata_quantity, codata_value, gnu_expr, unit) )
units_stdin.append(gnu_expr) # request "have" expression
units_stdin.append(unit) # target "want" unit
else:
warn(f"ignoring unhandled CODATA quantity: {codata_quantity}")
# phase 2: invoke "units" to convert values
cmd = ['units', '-1', '-d', 'max', '-f', units_definitions]
if supplement_file:
cmd.extend(['-f', supplement_file])
units_stdin = "\n".join(units_stdin).encode()
r = subprocess.run(cmd, input=units_stdin, capture_output=True, timeout=8)
if r.stderr:
#this error message isn't going to be very helpful; it will lack
#context about what line of input is causing problems...
die(f"Error running {cmd}: {r.stderr.rstrip()}")
#phase 3: process "units" output
try:
lines = iter(r.stdout.decode().split(sep="\n"))
while line := lines.__next__().strip():
if line == "":
break
for (codata_quantity, codata_value, gnu_expr, unit) in entries:
if not (line := lines.__next__().strip()):
die("units stdout ended while still having CODATA to process")
star = line.find("*")
if star < 0:
die(f"got unexpected response from GNU units for {gnu_expr} -> {unit}: {line}")
check_fuzzy_equal(codata_quantity, codata_value, unit, gnu_expr, line[star+1:], fuzz)
except StopIteration:
pass
usage_suggestion = '''
Comparing units(1) and CODATA Constants
=======================================
This codata-check script compares a file containing CODATA constants
against values reported by units(1). Here is a suggested work-flow.
1. It is recommended to work in a fresh directory, installing
the files below into it.
2. Obtain the necessary files:
* Make a copy of the units data file, usually the definitions.units
file that was installed as part of GNU units (or extracted from
a tarball of same).
* Copy (or symlink) all of the files !include'd from the definitions.units
file into the working directory. You can get a quick list of the
required files with:
grep '!include' definitions.units
* A file with the latest CODATA constants. It is possible that
the location may change at some time in the future, but for many
years now it has been available from:
https://physics.nist.gov/cuu/Constants/Table/allascii.txt
* The file mapping units names to corresponding CODATA values,
by default, "CODATA.map".
At the time of writing this help, there is a discrepancy between how
GNU units defines natural_energy and how CODATA defines natural energy.
As a work-around, create a supplementary file which forces the CODATA
definition:
echo '+natural_energy m_e c^2' >natural_energy
3. Make the initial comparison using the local working copy of the
units data file:
python codata-check -u natural_energy -v >/dev/null
(For this phase we don't care about normal output, thus the
redirection to the bit-bucket.)
For the input files to be well-formed, every "GNU expression"
in the map file must be well-defined in the units data file,
and every quantity listed in the NIST CODATA file must have
an entry in the map file. Warnings will be written to stderr
if these constraints are violated (*), and so they will show
up with the above >/dev/null redirection. These problems need
to be cleaned up first. After making appropriate corrections,
repeat this step until no warnings are given.
(*) Technically, only simple one-token expressions are checked
at this stage. In principle it is possible for other expressions
to have problems; any such will be detected (perhaps with a less
helpful error message) in a later step. (The checking at this
stage is purely textual; later stages invoke GNU units to actually
evaluate the full expressions.)
4. Once the three files are in sync with respect to the tracked
quantities and units, check for data value discrepancies:
python codata-check -u natural_energy -v | grep -v ^derived value'
Any problems will be reported as a "mismatch", with enough details
to know what units need fixing in the local working copy, and
what the correct values should be. Make any necessary updates
and repeat the comparison until there is no output.
If the output is too quiet and you want reassurance that something is
actually happening, run codata-check with the '-d' (debug) option, e.g.:
python codata-check -u natural_energy -d
In this case, every "non-derived" unit (in a sense documented in the
CODATA.map file) should have a disposition reported (either "good"
or "mismatch").
5. As an final check, confirm that *all* values in the CODATA
file match what GNU units reports by running codata-check
without any filters:
python codata-check -u natural_energy -v
In addition to the checks performed in step 4, any "derived" values
that differ from the CODATA values will also be reported. If the
earlier run in step 4 was silent, then this run ought to be
silent also; if not, there is a bug somewhere (the definitions.unit
file, the CODATA.map file, the allascii.txt file, or possibly
even the codata-check script). It may take some detective work
to resolve any errors which show up only in verbose mode, but
ideally this will be a rare occurrence.
6a. Ensure that the version identifier and the last-updated and copyright
dates in definitions.units are all updated.
6b. Clean-up any tabs and/or any trailing spaces which might have
been introduced:
expand definitions.units > tmp$$
sed 's/ *$//' tmp$$ > definitions.units
rm tmp$$
'''
help_epilog = '''
Where:
codata-file (default: allascii.txt)
is a local copy of the NIST allascii.txt file
https://physics.nist.gov/cuu/Constants/Table/allascii.txt
map-file (default: CODATA.map)
consists of mappings between GNU unit expressions and CODATA Quantity
names; the two fields are separated by two or more spaces
units-definitions (default: use system-default definitions.units)
use the specified file in place of the system's default definition.units
file (passed as a "-f" argument to GNU units)
units-supplement (default: do not use a supplemental file)
consists of any local updates to the normal GNU units definitions.units
file; this is particularly helpful for testing whether proposed changes
fix things as desired (passed as a "-f" argument to GNU units)
fuzz (default: 2)
is an integer specifying how much fuzziness is allowed when
comparing derived quantities (to account for rounding errors).
It specifies how many "units in the last position" ("ULPs")
are tolerated; for example, a fuzziness of 2 allows 3.1415
to match 3.1417, but not 3.1418. This comparison is done
after rounding the computed GNU unit value to have the same
precision as the provided NIST CODATA value.
verbose (default: not verbose)
show issues with "derived" quantities (in addition to,
not instead of, the printing of issues with "primary" values)
debug (default: not debug)
include debugging diagnostics
For a suggested work-flow, use the "--usage" flag.
'''
parser = argparse.ArgumentParser(
prog = "codata-check",
description = "check GNU units against CODATA values downloaded from NIST",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog = help_epilog
)
parser.add_argument('-c', '--codata-file', default='allascii.txt')
parser.add_argument('-m', '--map-file', default='CODATA.map')
parser.add_argument('-U', '--units-definitions', default='')
parser.add_argument('-u', '--units-supplement', default=None)
parser.add_argument('-f', '--fuzz', type=int, default=2)
parser.add_argument('-v', '--verbose', action='store_true')
parser.add_argument('-d', '--debug', action='store_true')
parser.add_argument('-V', '--version', action='store_true')
parser.add_argument('--usage', action='store_true')
args = parser.parse_args()
if args.version:
print(f"{sys.argv[0]} version {VERSION}")
sys.exit(0)
if args.usage:
print(usage_suggestion)
sys.exit(0)
(unit_map, specials) = get_mapping(args.map_file)
if args.units_definitions:
validate_targets(unit_map, args.units_definitions, args.units_supplement)
elif args.units_supplement:
try_open(args.units_supplement).close() # we just want the error-checking
process_codata(args.codata_file, unit_map, specials, args.units_definitions, args.units_supplement, args.fuzz)
|