1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
|
#!/usr/bin/env bash
# Purpose: Check data file against DIWG (and, eventually, other) recommendations
# Copyright (C) 2023--present Charlie Zender, ... (Contribuing DIWG members---Add your names!)
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the 3-Clause BSD License for more details.
# Prerequisites: Bash, NCO
# Script could use other shells, e.g., dash (Debian default) after rewriting function definitions and loops
# Debug with 'bash -x ncchecker --dbg=dbg_lvl' where 0 <= dbg_lvl <= 5
# Insta-install:
# scp ~/diwg/ncchecker dust.ess.uci.edu:bin
# scp dust.ess.uci.edu:bin/ncchecker ${MY_BIN_DIR}
# Set script name, directory, PID, run directory
drc_pwd=${PWD}
# Security: Explicitly unset IFS before wordsplitting, so Bash uses default IFS=<space><tab><newline>
unset IFS
# Set these before 'module' command which can overwrite ${BASH_SOURCE[0]}
# NB: dash supports $0 syntax, not ${BASH_SOURCE[0]} syntax
# http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-stored-in
spt_src="${BASH_SOURCE[0]}"
[[ -z "${spt_src}" ]] && spt_src="${0}" # Use ${0} when BASH_SOURCE is unavailable (e.g., dash)
while [ -h "${spt_src}" ]; do # Recursively resolve ${spt_src} until file is no longer a symlink
drc_spt="$( cd -P "$( dirname "${spt_src}" )" && pwd )"
spt_src="$(readlink "${spt_src}")"
[[ ${spt_src} != /* ]] && spt_src="${drc_spt}/${spt_src}" # If ${spt_src} was relative symlink, resolve it relative to path where symlink file was located
done
cmd_ln="${spt_src} ${@}"
drc_spt="$( cd -P "$( dirname "${spt_src}" )" && pwd )"
spt_nm=$(basename ${spt_src}) # [sng] Script name (unlike $0, ${BASH_SOURCE[0]} works well with 'source <script>')
spt_pid=$$ # [nbr] Script PID (process ID)
# Original Setup (prior to merger with NCO):
# git clone git@github.com:diwg/diwg ~/diwg
# ln -s ~/diwg/diwg_chk ~/bin/diwg_chk
# ln -s ~/diwg/diwg_chk ~/sh/diwg_chk
# Debugging and Benchmarking:
# ncchecker ~/nco/data/in.nc ~/nco/data/in_4.nc
# ncchecker ~/data/bm/elmv2_ne30pg2l15.nc
# ncchecker ~/data/hdf/MODIS_L2N_20140304T1120.nc > ~/diwg.txt 2>&1 &
# ncchecker ~/data/hdf/OMI-Aura_L2-OMAERUV_2013m1004t2338-o49057_v003-2013m1005t053932.nc
# Production usage:
# ncchecker --dbg=1 ~/nco/data/in_4.nc > ~/diwg.txt 2>&1 &
# screen # Start screen
# ncchecker --dbg=1 ~/nco/data/in_4.nc > ~/diwg.txt 2>&1 &
# Ctl-A D # Detach screen
# tail ~/diwg.txt # Monitor progress
# screen -ls # List screens
# screen -r <ID> # Re-attach screen
# Configure paths at High-Performance Computer Centers (HPCCs) based on ${HOSTNAME}
if [ -z "${HOSTNAME}" ]; then
if [ -f /bin/hostname ] && [ -x /bin/hostname ]; then
export HOSTNAME=`/bin/hostname`
elif [ -f /usr/bin/hostname ] && [ -x /usr/bin/hostname ]; then
export HOSTNAME=`/usr/bin/hostname`
fi # !hostname
fi # HOSTNAME
# Set NCO version and directory
nco_exe=`which ncks`
if [ -z "${nco_exe}" ]; then
echo "${spt_nm}: ERROR Unable to find NCO, \${nco_exe} = ${nco_exe}"
echo "${spt_nm}: HINT Carefully examine your environment setup (e.g., .bashrc) to avoid inadvertently overriding (with, e.g., conda-initialization) paths intended to be provided by an analysis-package environment (e.g., Anaconda)"
exit 1
fi # !nco_exe
# StackOverflow method finds NCO directory
while [ -h "${nco_exe}" ]; do
drc_nco="$( cd -P "$( dirname "${nco_exe}" )" && pwd )"
nco_exe="$(readlink "${nco_exe}")"
[[ ${nco_exe} != /* ]] && nco_exe="${drc_nco}/${nco_exe}"
done
drc_nco="$( cd -P "$( dirname "${nco_exe}" )" && pwd )"
nco_vrs=$(ncks --version 2>&1 > /dev/null | grep NCO | awk '{print $5}')
nco_sng=$(ncks --version 2>&1 > /dev/null | grep NCO | awk -F '"' '{print $2}')
# 20190218: Die quickly when NCO is found yet cannot run, e.g., due to linker errors
if [ -z "${nco_vrs}" ]; then
echo "${spt_nm}: ERROR ${nco_exe} dies with error message on next line:"
$(ncks --version)
exit 1
fi # !nco_vrs
lbr_vrs=$(ncks --library 2>&1 > /dev/null | awk '{print $6}')
# When running in a terminal window (not in an non-interactive batch queue)...
if [ -n "${TERM}" ]; then
# Set fonts for legibility
if [ -x /usr/bin/tput ] && tput setaf 1 &> /dev/null; then
fnt_bld=`tput bold` # Bold
fnt_nrm=`tput sgr0` # Normal
fnt_rvr=`tput smso` # Reverse
fnt_tlc=`tput sitm` # Italic
else
fnt_bld="\e[1m" # Bold
fnt_nrm="\e[0m" # Normal
fnt_rvr="\e[07m" # Reverse
fnt_tlc="\e[3m" # Italic
fi # !tput
fi # !TERM
# Defaults for command-line options and some derived variables
# Modify these defaults to save typing later
caseid='elmforc.ERA5.c2018.0.25d' # [sng] Case ID
dbg_lvl=0 # [nbr] Debugging level
drc_in='' # [sng] Input file directory
drc_in_xmp="${DATA}/era5/data_raw/1979" # [sng] Input file directory for examples
drc_out='' # [sng] Output file directory
drc_out_xmp="${DATA}/era5/data_out" # [sng] Output file directory for examples
inp_aut='No' # [sng] Input file list automatically generated
inp_glb='No' # [sng] Input file list from globbing directory
inp_psn='No' # [sng] Input file list from positional arguments
inp_std='No' # [sng] Input file list from stdin
nco_opt='' # [sng] NCO options (e.g., '-6 -t 1')
tst_flg='Yes' # [flg] Trigger test-selection code
tst_lst='bnd,chr,mss,nan,tm,xtn' # [sng] Tests to conduct (subset of bnd,chr,mss,nan,tm,xtn)
vrs_prn='No' # [sng] Print version information
# NCO filters documented in http://nco.sf.net/nco.html#filter
function ncvarlst { ncks --trd -m ${1} | grep -E ': type' | cut -f 1 -d ' ' | sed 's/://' | sort ; }
function ncdmnlst { ncks --cdl -m ${1} | cut -d ':' -f 1 | cut -d '=' -s -f 1 ; }
function fnc_usg_prn { # NB: dash supports fnc_nm (){} syntax, not function fnc_nm{} syntax
# Print usage
printf "${fnt_rvr}Basic usage:\n${fnt_nrm}${fnt_bld}${spt_nm} in.nc${fnt_nrm} # Default settings\n"
printf "${fnt_bld}${spt_nm} -t bnd,nan${fnt_nrm} # Short option syntax with explicit tests\n"
printf "${fnt_bld}${spt_nm} --tst=bnd,nan${fnt_nrm} # Long options with with explicit tests\n"
echo "Command-line options [long-option synonyms in ${fnt_tlc}italics${fnt_nrm}]:"
# echo "${fnt_rvr}-3${fnt_nrm} Output file format CLASSIC (netCDF3 classic CDF1) [${fnt_tlc}fl_fmt, file_format=classic${fnt_nrm}]"
# echo "${fnt_rvr}-4${fnt_nrm} Output file format NETCDF4 (netCDF4 extended HDF5) [${fnt_tlc}fl_fmt, file_format=netcdf4${fnt_nrm}]"
# echo "${fnt_rvr}-5${fnt_nrm} Output file format 64BIT_DATA (netCDF3/PnetCDF CDF5) [${fnt_tlc}fl_fmt, file_format=64bit_data${fnt_nrm}]"
# echo "${fnt_rvr}-6${fnt_nrm} Output file format 64BIT_OFFSET (netCDF3 64bit CDF2) [${fnt_tlc}fl_fmt, file_format=64bit_offset${fnt_nrm}]"
# echo "${fnt_rvr}-7${fnt_nrm} Output file format NETCDF4_CLASSIC (netCDF4 classic HDF5) [${fnt_tlc}fl_fmt, file_format=netcdf4_classic${fnt_nrm}]"
# echo "${fnt_rvr}-c${fnt_nrm} ${fnt_bld}caseid${fnt_nrm} Case ID string to generate output filenames (default ${fnt_bld}${caseid}${fnt_nrm}) [${fnt_tlc}caseid, case_id, case${fnt_nrm}]"
echo "${fnt_rvr}-d${fnt_nrm} ${fnt_bld}dbg_lvl${fnt_nrm} Debug level (default ${fnt_bld}${dbg_lvl}${fnt_nrm}) [${fnt_tlc}dbg_lvl, dbg, debug, debug_level${fnt_nrm}]"
echo "${fnt_rvr}-i${fnt_nrm} ${fnt_bld}drc_in${fnt_nrm} Input directory (default ${fnt_bld}${drc_in}${fnt_nrm}) [${fnt_tlc}drc_in, in_drc, dir_in, in_dir, input${fnt_nrm}]"
# echo "${fnt_rvr}-j${fnt_nrm} ${fnt_bld}job_nbr${fnt_nrm} Job simultaneity for parallelism (default ${fnt_bld}${job_nbr}${fnt_nrm}) [${fnt_tlc}job_nbr, job_number, jobs${fnt_nrm}]"
# echo "${fnt_rvr}-o${fnt_nrm} ${fnt_bld}drc_out${fnt_nrm} Output directory (default ${fnt_bld}${drc_out}${fnt_nrm}) [${fnt_tlc}drc_out, out_drc, dir_out, out_dir, output${fnt_nrm}]"
echo "${fnt_rvr}-p${fnt_nrm} ${fnt_bld}par_typ${fnt_nrm} Parallelism type (default ${fnt_bld}${par_typ}${fnt_nrm}) [${fnt_tlc}par_typ, par_md, parallel_type, parallel_mode, parallel${fnt_nrm}] [${fnt_tlc}serial | background | mpi${fnt_nrm}]"
# echo " ${fnt_bld}--tpd_out${fnt_nrm} Timesteps-per-day in output (default ${fnt_bld}${tpd_out}${fnt_nrm}) [${fnt_tlc}tpd_out, tpd, timesteps_per_day${fnt_nrm}]"
echo " ${fnt_bld}--tests${fnt_nrm} Tests to conduct ('none' means none) (default ${fnt_bld}${tst_lst}${fnt_nrm} [${fnt_tlc}tst_lst, tst, test_list${fnt_nrm}]"
echo "${fnt_rvr}-v${fnt_nrm} ${fnt_bld}var_lst${fnt_nrm} Variable list (empty means all) (default ${fnt_bld}${var_lst}${fnt_nrm}) [${fnt_tlc}var_lst, variable_list, var, vars, variable, variables${fnt_nrm}]"
echo " ${fnt_bld}--version${fnt_nrm} Version and configuration information [${fnt_tlc}version, vrs, config, configuration, cnf${fnt_nrm}]"
echo "${fnt_rvr}-x${fnt_nrm} ${fnt_bld}--xcl_var${fnt_nrm} Exclude rather than extract var_lst [${fnt_tlc}xcl_var, xcl, exclude, exclude_variables${fnt_nrm}]"
printf "\n"
printf "${fnt_rvr}Examples:${fnt_nrm}\n"
printf "${fnt_bld}${spt_nm} in1.nc in2.nc${fnt_nrm} # Run all tests on two files\n"
printf "${fnt_bld}${spt_nm} -v var1,var2 in1.nc # Check only two variables\n"
printf "${fnt_bld}${spt_nm} *.nc${fnt_nrm} # Glob input files via wildcard\n"
printf "${fnt_bld}ls *.nc | ${spt_nm}${fnt_nrm} # Input files via stdin\n"
printf "${fnt_bld}${spt_nm} --dbg=2 *.nc${fnt_nrm} # Debug ${spt_nm}\n"
printf "${fnt_bld}${spt_nm} --tests=nan,mss *.nc # Select only two tests\n"
printf "${fnt_bld}${spt_nm} --tests=xtn,tm,nan,mss,chr,bnd *.nc${fnt_nrm} # Change test ordering\n"
printf "${fnt_bld}${spt_nm} "file://${HOME}/in_zarr4#mode=nczarr,file"${fnt_nrm} # Check Zarr object(s)\n"
printf "\nComplete documentation at http://nco.sf.net/nco.html#${spt_nm}\n\n"
exit 1
} # !fnc_usg_prn()
# Check argument number and complain accordingly
arg_nbr=$#
#printf "\ndbg: Number of arguments: ${arg_nbr}"
if [ ${arg_nbr} -eq 0 ]; then
fnc_usg_prn
fi # !arg_nbr
# Parse command-line options:
# http://stackoverflow.com/questions/402377/using-getopts-in-bash-shell-script-to-get-long-and-short-command-line-options (see method by Adam Katz)
# http://tuxtweaks.com/2014/05/bash-getopts
while getopts :34567c:d:f:i:o:p:s:v:x-: OPT; do
case ${OPT} in
3) fl_fmt='3' ;; # File format
4) fl_fmt='4' ;; # File format
5) fl_fmt='5' ;; # File format
6) fl_fmt='6' ;; # File format
7) fl_fmt='7' ;; # File format
c) caseid="${OPTARG}" ;; # CASEID
d) dbg_lvl="${OPTARG}" ;; # Debugging level
f) fml_nm_usr="${OPTARG}" ;; # Family name
i) drc_in="${OPTARG}" ;; # Input directory
o) drc_out_usr="${OPTARG}" ;; # Output directory
p) par_typ="${OPTARG}" ;; # Parallelism type
v) var_lst="${OPTARG}" ;; # Variables
x) xcl_flg='Yes' ;; # Exclude variable list
-) LONG_OPTARG="${OPTARG#*=}"
case ${OPTARG} in
# Hereafter ${OPTARG} is long argument key, and ${LONG_OPTARG}, if any, is long argument value
# Long options with no argument, no short option counterpart
# Long options with argument, no short option counterpart
# Long options with short counterparts, ordered by short option key
caseid=?* | case_id=?* | case=?* ) caseid="${LONG_OPTARG}" ;; # -c # CASEID
dbg_lvl=?* | dbg=?* | debug=?* | debug_level=?* ) dbg_lvl="${LONG_OPTARG}" ;; # -d # Debugging level
drc_in=?* | in_drc=?* | dir_in=?* | in_dir=?* | input=?* ) drc_in="${LONG_OPTARG}" ;; # -i # Input directory
drc_out=?* | out_drc=?* | dir_out=?* | out_dir=?* | output=?* ) drc_out_usr="${LONG_OPTARG}" ;; # -o # Output directory
fl_fmt=?* | fmt_out=?* | file_format=?* | format_out=?* ) fl_fmt="${LONG_OPTARG}" ;; # # Output file format
fml_nm=?* | fml=?* | family_name=?* | family=?* ) fml_nm_usr="${LONG_OPTARG}" ;; # -f # Family name
hrd_pth | hard_path | npo | nco_path_override ) hrd_pth='Yes' ;; # # Use hard-coded paths on known machines (intentional no-op because already handled prior to getopt())
hrd_pth=?* | hard_path=?* | npo=?* | nco_path_override=?* ) echo "No argument allowed for --${OPTARG switch}" >&2; exit 1 ;; # # Use hard-coded paths on known machines
job_nbr=?* | job_number=?* | jobs=?* ) job_usr="${LONG_OPTARG}" ;; # -j # Job simultaneity
par_typ=?* | par_md=?* | parallel_type=?* | parallel_mode=?* | parallel=?* ) par_typ="${LONG_OPTARG}" ;; # -p # Parallelism type
tests=?* | tst_lst=?* | tst=?* | test_list=?* ) tst_lst="${LONG_OPTARG}" ;; # # Tests to conduct
tpd_out=?* | tpd=?* | timesteps_per_day=?* ) tpd_usr="${LONG_OPTARG}" ;; # # Timesteps-per-day in output
var_lst=?* | variable_list=?* | var=?* | vars=?* | variable=?* | variables=?* ) var_lst="${LONG_OPTARG}" ;; # -v # Variables
version | vrs | config | configuration | cnf ) vrs_prn='Yes' ;; # # Print version information
version=?* | vrs=?* | config=?* | configuration=?* | cnf=?* ) echo "No argument allowed for --${OPTARG switch}" >&2; exit 1 ;; # # Print version information
xcl_var | xcl | exclude | exclude_variables ) xcl_flg='Yes' ;; # # Exclude rather than extract variable list
xcl_var=?* | xcl=?* | exclude=?* | exclude_variables=?* ) echo "No argument allowed for --${OPTARG switch}" >&2; exit 1 ;; # # Exclude rather than extract variable list
'' ) break ;; # "--" terminates argument processing
* ) printf "\nERROR: Unrecognized option ${fnt_bld}--${OPTARG}${fnt_nrm}\n" >&2; fnc_usg_prn ;;
esac ;; # !OPTARG
\?) # Unrecognized option
printf "\nERROR: Option ${fnt_bld}-${OPTARG}${fnt_nrm} not recognized\n" >&2
fnc_usg_prn ;;
esac # !OPT
done # !getopts
shift $((OPTIND-1)) # Advance one argument
psn_nbr=$#
if [ ${psn_nbr} -ge 1 ]; then
inp_psn='Yes'
# 20200430 Input files on command-line mean we need not check standard-input
std_chk='No'
fi # !psn_nbr
if [ ${vrs_prn} = 'Yes' ]; then
printf "${spt_nm}, runs DIWG recommendation compliance checks, version ${nco_vrs} \"${nco_sng}\"\n"
printf "Authors: \n"
printf "Config: ${spt_nm} script located in directory ${drc_spt}\n"
printf "Config: NCO binaries located in directory ${drc_nco}, linked to netCDF library version ${lbr_vrs}\n"
if [ "${hrd_pth_fnd}" = 'Yes' ]; then
printf "Config: Employ NCO machine-dependent hardcoded paths/modules for ${HOSTNAME}. (If desired, turn-off NCO hardcoded paths with \"export NCO_PATH_OVERRIDE=No\").\n"
else
printf "Config: No hardcoded machine-dependent path/module overrides. (If desired, turn-on NCO hardcoded paths at supported national labs with \"export NCO_PATH_OVERRIDE=Yes\").\n"
fi # !hrd_pth_fnd
exit 0
fi # !vrs_prn
# Determine mode first (this helps determine other defaults)
if [ ${tst_lst} = 'none' ]; then
tst_nbr=0
tst_flg='No'
fi # !tst_lst
if [ -z "${drc_in}" ]; then
drc_in="${drc_pwd}"
else # !drc_in
if [ ! -d "${drc_in}" ]; then
echo "${spt_nm}: ERROR specified input directory \"${drc_in}\" does not exist"
exit 1
fi # !drc_in
drc_in_usr='Yes'
fi # !drc_in
# 20170807 Custom tests (code borrowed from ncremap seasons (csn) treatment)
if [ "${tst_flg}" = 'Yes' ]; then
# Standard test enumeration
# Index into test definition table
tst_idx_srt=0 # [idx] Starting index for test enumeration
tst_bnd=0
tst_chr=1
tst_mss=2
tst_nan=3
tst_tm=4
tst_xtn=5
tst_nbr_max=6 # [nbr] Maximum number of tests in definitions database
# Test abbreviations, uppercase
tst_abb[${tst_bnd}]='BND'
tst_abb[${tst_chr}]='CHR'
tst_abb[${tst_mss}]='MSS'
tst_abb[${tst_nan}]='NAN'
tst_abb[${tst_tm}]='TM'
tst_abb[${tst_xtn}]='XTN'
# Test abbreviations, lowercase
tst_abb_lc[${tst_bnd}]='bnd'
tst_abb_lc[${tst_chr}]='chr'
tst_abb_lc[${tst_mss}]='mss'
tst_abb_lc[${tst_nan}]='nan'
tst_abb_lc[${tst_tm}]='tm'
tst_abb_lc[${tst_xtn}]='xtn'
# Recommendation (i.e., short exhortation) in ESDS RFC Title
tst_rcm[${tst_bnd}]='Use CF Bounds Attributes'
tst_rcm[${tst_chr}]='Identifier names shall comply with this regular expression: [A-Za-z][A-Za-z0-9_]*'
tst_rcm[${tst_mss}]='Avoid use of the missing_value attribute in new Earth Science data products'
tst_rcm[${tst_nan}]='Earth Science data products should avoid using Not-a-Number (NaN) in any field values or as an indicator of missing or invalid data'
tst_rcm[${tst_tm}]='Use Double Precision When Archiving Time in Seconds Since a Specific Epoch'
tst_rcm[${tst_xtn}]='Files created with the HDF5, HDF-EOS5, or netCDF APIs should have filename extensions \"h5\", \"he5\", or \"nc\", respectively'
# ESDS RFC Recommendation Number
tst_rfc[${tst_bnd}]='2.3'
tst_rfc[${tst_chr}]='3.1'
tst_rfc[${tst_mss}]='4.2'
tst_rfc[${tst_nan}]='3.7'
tst_rfc[${tst_tm}]='4.11'
tst_rfc[${tst_xtn}]='3.8'
# ESDS RFC Recommendation URL
tst_url[${tst_bnd}]='https://wiki.earthdata.nasa.gov/pages/viewpage.action?pageId=182296291'
tst_url[${tst_chr}]='https://wiki.earthdata.nasa.gov/display/ESDSWG/Character+Set+for+User-Defined+Group%2C+Variable%2C+and+Attribute+names'
tst_url[${tst_mss}]='https://wiki.earthdata.nasa.gov/display/ESDSWG/Avoid+Use+of+the+missing_value+Attribute'
tst_url[${tst_nan}]='https://wiki.earthdata.nasa.gov/display/ESDSWG/Not-a-Number+%28NaN%29+Value'
tst_url[${tst_tm}]='https://wiki.earthdata.nasa.gov/display/ESDSWG/Use+Double+Precision+When+Archiving+Time+in+Seconds+Since+a+Specific+Epoch'
tst_url[${tst_xtn}]='https://wiki.earthdata.nasa.gov/pages/viewpage.action?pageId=182297715'
# Test names "in English" (complete phrase "Test checks that ___")
tst_ngl[${tst_bnd}]='coordinates have bounds attributes'
tst_ngl[${tst_chr}]='identifier names comprised of legal characters'
tst_ngl[${tst_mss}]='variables do not have missing_value attributes'
tst_ngl[${tst_nan}]='data values do not contain NaN (or NaNf)'
tst_ngl[${tst_tm}]='time variables are stored in double precision'
tst_ngl[${tst_xtn}]='filename extension matches recommended list'
# Test short names (complete phrase "___ check passed/failed")
tst_sht[${tst_bnd}]='coordinate bounds'
tst_sht[${tst_chr}]='character-set'
tst_sht[${tst_mss}]='missing_value'
tst_sht[${tst_nan}]='NaN'
tst_sht[${tst_tm}]='precision of time variables'
tst_sht[${tst_xtn}]='filename extension'
# Test invocation options to ncks
tst_opt[${tst_bnd}]='--chk_bnd'
tst_opt[${tst_chr}]='--chk_chr'
tst_opt[${tst_mss}]='--chk_mss'
tst_opt[${tst_nan}]='--chk_nan'
tst_opt[${tst_tm}]='--chk_tm'
tst_opt[${tst_xtn}]='--chk_xtn'
# Which tests are requested?
# http://stackoverflow.com/questions/27702452/loop-through-a-comma-separated-shell-variable
tst_nbr=0 # [sng] Number of tests to conduct
for tst in ${tst_lst//,/ }; do
tst_rqs[${tst_nbr}]=${tst}
# NB: Requested seasons are 0-based, defined seasons are 0-based
for ((tst_dfn_idx=${tst_idx_srt};tst_dfn_idx<${tst_nbr_max};tst_dfn_idx++)); do
if [[ "${tst}" =~ "${tst_abb[${tst_dfn_idx}]}" ]] || [[ "${tst}" =~ "${tst_abb_lc[${tst_dfn_idx}]}" ]]; then
# Map requested to defined (r2d) tests and inverse (d2r)
# map_r2d[0]=3 means first test that user requested (i.e., in tst_lst) is fourth defined in table
# map_d2r[3]=0 means fourth defined test is first requested
map_r2d[${tst_nbr}]=${tst_dfn_idx}
map_d2r[${tst_dfn_idx}]=${tst_nbr}
let tst_nbr=${tst_nbr}+1
break
fi # !match
done # !tst_dfn_idx
if [ "${tst_dfn_idx}" -eq "${tst_nbr_max}" ]; then
printf "${spt_nm}: ERROR Requested test ${tst} not defined\n"
exit 1
fi # !match
done # !tst_lst
fi # !tst_flg
# Read files from stdin pipe, positional arguments, or directory glob
#printf "dbg: inp_aut = ${inp_aut}\n"
#printf "dbg: inp_glb = ${inp_glb}\n"
#printf "dbg: inp_psn = ${inp_psn}\n"
#printf "dbg: inp_std = ${inp_std}\n"
# Derived variables
if [ -n "${drc_out_usr}" ]; then
chr_fst=${drc_out_usr:0:1}
if [ "${chr_fst}" = '-' ]; then
printf "${spt_nm}: WARNING first character of user-specified output directory drc_out is \"${chr_fst}\", which looks like an option itself. Be sure the ${spt_nm} '-O' option is followed by an argument that specifies the output directory for processed files. The '-O' option requires a pathname argument, and should not be confused with the '-O' flag (which takes no argument) used in the rest of NCO to force overwriting files (${spt_nm} automatically overwrites existing output files).\n"
fi # !chr_fst
# Fancy %/ syntax removes trailing slash (e.g., from $TMPDIR)
drc_out="${drc_out_usr%/}"
fi # !drc_out_usr
# Doubly-derived variables
if [ ${inp_glb} = 'Yes' ]; then
for fl in "${drc_in}"/*.nc "${drc_in}"/*.nc3 "${drc_in}"/*.nc4 "${drc_in}"/*.nc5 "${drc_in}"/*.nc6 "${drc_in}"/*.nc7 "${drc_in}"/*.cdf "${drc_in}"/*.hdf "${drc_in}"/*.he5 "${drc_in}"/*.h5 ; do
if [ -f "${fl}" ]; then
fl_in[${fl_nbr}]=${fl}
let fl_nbr=${fl_nbr}+1
fi # !file
done
fi # !inp_glb
if [ ${inp_psn} = 'Yes' ]; then
# Read any positional arguments
for ((psn_idx=1;psn_idx<=psn_nbr;psn_idx++)); do
fl_in[(${psn_idx}-1)]=${!psn_idx}
fl_nbr=${psn_nbr}
done # !psn_idx
fi # !inp_psn
if [ ${inp_std} = 'Yes' ]; then
# Input awaits on unit 0, i.e., on stdin
while read -r line; do # NeR05 p. 179
fl_in[${fl_nbr}]=${line}
let fl_nbr=${fl_nbr}+1
done < /dev/stdin
fi # !inp_std
# Create output directory
if [ -n "${drc_out}" ] && [ ! -d "${drc_out}" ]; then
chr_fst=${drc_out:0:1}
if [ "${chr_fst}" = '-' ]; then
echo "${spt_nm}: ERROR Attempting to mkdir user-specified output directory \"${drc_out}\" will fail because directory name begins with '-' which is an option indicator"
echo "${spt_nm}: HINT Specify an output directory name that does not begin with '-'"
exit 1
fi # !chr_fst
cmd_mkd="mkdir -p ${drc_out}"
eval ${cmd_mkd}
if [ "$?" -ne 0 ]; then
printf "${spt_nm}: ERROR Failed to create output directory. Debug this:\n${cmd_mkd}\n"
printf "${spt_nm}: HINT Creating a directory requires proper write permissions\n"
exit 1
fi # !err
fi # !drc_out
# Print initial state
if [ ${dbg_lvl} -ge 2 ]; then
printf "dbg: dbg_lvl = ${dbg_lvl}\n"
printf "dbg: drc_in = ${drc_in}\n"
printf "dbg: drc_out = ${drc_out}\n"
printf "dbg: hrd_pth = ${hrd_pth}\n"
printf "dbg: job_nbr = ${job_nbr}\n"
printf "dbg: mpi_flg = ${mpi_flg}\n"
printf "dbg: mpi_nbr = ${mpi_nbr}\n"
printf "dbg: nco_opt = ${nco_opt}\n"
printf "dbg: tst_lst = ${tst_lst}\n"
printf "dbg: tst_nbr = ${tst_nbr}\n"
printf "dbg: var_lst = ${var_lst}\n"
printf "dbg: xcl_flg = ${xcl_flg}\n"
fi # !dbg
if [ ${dbg_lvl} -ge 2 ]; then
psn_nbr=$#
if [ ${psn_nbr} -ge 1 ]; then
printf "dbg: Found ${psn_nbr} positional parameters (besides \$0):\n"
for ((psn_idx=1;psn_idx<=psn_nbr;psn_idx++)); do
printf "dbg: psn_arg[${psn_idx}] = ${!psn_idx}\n"
done # !psn_idx
fi # !psn_nbr
if [ "${fl_nbr}" -ge 1 ]; then
printf "dbg: Processing ${fl_nbr} raw input files:\n"
for ((fl_idx=0;fl_idx<fl_nbr;fl_idx++)); do
printf "dbg: fl_in[${fl_idx}] = ${fl_in[${fl_idx}]}\n"
done # !fl_idx
fi # !fl_nbr
fi # !dbg
# Human-readable summary
date_srt=$(date +"%s")
if [ ${dbg_lvl} -ge 0 ]; then
printf "ncchecker, a DIWG recommendation compliance checker, invoked with command:\n"
echo "${cmd_ln}"
printf "Started DIWG checker at `date`\n"
printf "# of recommendations that will be checked: ${tst_nbr}/${tst_nbr_max}\n"
printf "Recommendations that will be checked: "
for ((tst_idx=0;tst_idx<tst_nbr;tst_idx++)); do
idx_dfn=${map_r2d[${tst_idx}]}
printf "${tst_rfc[${idx_dfn}]} (${tst_sht[${idx_dfn}]}) "
done # !tst_idx
printf "\n"
fi # !dbg
if [ "${fl_nbr}" -lt 1 ]; then
echo "${spt_nm}: ERROR Checker did not receive any input files, fl_nbr = ${fl_nbr}"
exit 1
fi # fl_nbr
# Main Loop
# Prepend input directory string to filename
for ((fl_idx=0;fl_idx<fl_nbr;fl_idx++)); do
fl_out[${fl_idx}]="${drc_out}/$(basename ${fl_in[${fl_idx}]})"
#printf "fl_in[${fl_idx}] = ${fl_in[${fl_idx}]}\n"
#printf "fl_out[${fl_idx}] = ${fl_out[${fl_idx}]}\n"
printf "\nChecking file #${fl_idx}: ${fl_in[${fl_idx}]} ...\n"
for ((tst_idx=0;tst_idx<tst_nbr;tst_idx++)); do
let tst_idxp1=${tst_idx}+1
# Index of test in definition table
idx_dfn=${map_r2d[${tst_idx}]}
printf "\nTest ${tst_idxp1}: Check that ${tst_ngl[${idx_dfn}]} ...\n"
printf "Recommendation ${tst_rfc[${idx_dfn}]}: ${tst_rcm[${idx_dfn}]}\n"
echo "URL: ${tst_url[${idx_dfn}]}"
[[ ${dbg_lvl} -ge 0 ]] && date_chr=$(date +"%s")
cmd_tst[${fl_idx}]="ncks --dbg_lvl=${dbg_lvl} ${tst_opt[${idx_dfn}]} ${fl_in[${fl_idx}]}"
if [ ${dbg_lvl} -ge 2 ]; then
echo ${cmd_tst[${fl_idx}]}
fi # !dbg
if [ ${dbg_lvl} -le 1 ]; then
if [ -z "${par_opt}" ]; then
eval ${cmd_tst[${fl_idx}]}
if [ "$?" -ne 0 ]; then
printf "Result: FAILURE ${tst_sht[${idx_dfn}]} check failed. Reproduce this single check with:\n${cmd_tst[${fl_idx}]}\n"
else # !err
printf "Result: SUCCESS ${tst_sht[${idx_dfn}]} check passed.\n"
fi # !err
else # !par_opt
eval ${cmd_bnd[${fl_idx}]} ${par_opt} # eval always returns 0 on backgrounded processes
bnd_pid[${fl_idx}]=$!
fi # !par_opt
fi # !dbg
if [ ${dbg_lvl} -ge 1 ]; then
date_crr=$(date +"%s")
date_dff=$((date_crr-date_chr))
echo "Elapsed time for ${tst_sht[${idx_dfn}]} check = $((date_dff/60))m$((date_dff % 60))s"
fi # !dbg
done # !tst_idx
done # !fl_idx
date_end=$(date +"%s")
if [ ${dbg_lvl} -ge 0 ]; then
printf "\nCompleted DIWG Compliance Checker at `date`\n"
date_dff=$((date_end-date_srt))
echo "Elapsed time $((date_dff/60))m$((date_dff % 60))s"
fi # !dbg
exit 0
|