1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
|
#!/bin/sh
# PCP QA Test No. 870
# Various checks around the pmlogger control and run files in
# $PCP_TMP_DIR/pmlogger and $PCP_RUN_DIR
#
# With a single --check option, is silent except if there is a problem
# and runs just the integrity check. This could be used from check or
# check.callback
#
# Copyright (c) 2016 Ken McDonell. All Rights Reserved.
#
seq=`basename $0`
check=false
if [ $# -ge 1 -a "$1" = "--check" ]
then
check=true
# if 2nd arg present then use this as $seq ... we're being called from
# check.callback more than likely
#
[ $# -ge 2 -a "$2" != "" ] && seq="$2"
[ -f $seq.full ] && mv $seq.full $seq.full.save
fi
$check || echo "QA output created by $seq"
# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check
[ -f $seq_full.save ] && mv $seq_full.save $seq_full
status=0 # success is the default!
trap "cd $here; $sudo rm -rf $tmp $tmp.*; exit \$status" 0 1 2 3 15
_is_activating()
{
if [ "$PCPQA_SYSTEMD" = yes -a \
-n "$PCP_SYSTEMDUNIT_DIR" -a -f $PCP_SYSTEMDUNIT_DIR/pmlogger.service ]
then
eval `systemctl show --property=ActiveState pmlogger.service`
echo "ActiveState=$ActiveState" >>$seq_full
[ "$ActiveState" = activating ] && return 0
fi
return 1
}
_check()
{
# start by iterating over all running pmlogger processes
#
if $check
then
echo "--- running 870 --check @ `date` ---" >>$seq_full
fi
_get_pids_by_name pmlogger >$tmp.pmids
echo "_get_pids_by_name pmlogger ..." >>$seq_full
cat $tmp.pmids >>$seq_full
$PCP_PS_PROG $PCP_PS_ALL_FLAGS >$tmp.ps.out
echo "$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep /[p]mlogger ..." >>$seq_full
grep /[p]mlogger <$tmp.ps.out >>$seq_full
if [ -s $tmp.pmids ]
then
# at least one is running
for pid in `cat $tmp.pmids`
do
$PCP_AWK_PROG <$tmp.ps.out >$tmp.ps.pid '
$2 == '"$pid"' { print }'
# control file(s) ...
#
if [ -f $PCP_TMP_DIR/pmlogger/$pid ]
then
# control file exists
if grep 'pmlogger .*-P ' <$tmp.ps.pid >/dev/null
then
# primary logger
if [ -L $PCP_TMP_DIR/pmlogger/primary ]
then
check_pid=`ls -l $PCP_TMP_DIR/pmlogger/primary | sed -e 's/.*\///'`
if [ "$check_pid" = "$pid" ]
then
# all is well
if $check
then
:
else
echo "control file check is OK"
fi
else
echo "Error: $PCP_TMP_DIR/pmlogger/primary linked to pid $check_pid instead of $pid"
head -1 $tmp.ps.out
$PCP_AWK_PROG <$tmp.ps.out '
$2 == '"$check_pid"' { print }'
cat $tmp.ps.pid
ls -l $PCP_TMP_DIR/pmlogger/*
$check && touch $tmp.err
fi
else
echo "Error: $PCP_TMP_DIR/pmlogger/primary missing for ..."
head -1 $tmp.ps.out
cat $tmp.ps.pid
ls -l $PCP_TMP_DIR/pmlogger/*
$check && touch $tmp.err
fi
else
# nothing more to check
:
fi
else
echo "Error: $PCP_TMP_DIR/pmlogger/$pid missing for ..."
head -1 $tmp.ps.out
cat $tmp.ps.pid
ls -l $PCP_TMP_DIR/pmlogger/*
$check && touch $tmp.err
fi
# run file(s) ...
#
if [ -S $PCP_RUN_DIR/pmlogger.$pid.socket ]
then
# run file exists
if grep 'pmlogger .*-P ' <$tmp.ps.pid >/dev/null
then
# primary logger
if [ -L $PCP_RUN_DIR/pmlogger.primary.socket ]
then
check_pid=`ls -l $PCP_RUN_DIR/pmlogger.primary.socket | sed -e 's/.*\///' -e 's/pmlogger\.//' -e 's/\.socket//'`
if [ "$check_pid" = "$pid" ]
then
# all is well
if $check
then
:
else
echo "run file check is OK"
fi
else
echo "Error: $PCP_RUN_DIR/pmlogger.primary.socket linked to pid $check_pid instead of $pid"
head -1 $tmp.ps.out
$PCP_AWK_PROG <$tmp.ps.out '
$2 == '"$check_pid"' { print }'
cat $tmp.ps.pid
ls -l $PCP_RUN_DIR/pmlogger.*
$check && touch $tmp.err
fi
else
echo "Error: $PCP_RUN_DIR/pmlogger.primary.socket missing for ..."
head -1 $tmp.ps.out
cat $tmp.ps.pid
ls -l $PCP_RUN_DIR/pmlogger.*
$check && touch $tmp.err
fi
else
# nothing more to check
:
fi
else
echo "Error: $PCP_RUN_DIR/pmlogger.$pid.socket missing for ..."
head -1 $tmp.ps.out
cat $tmp.ps.pid
ls -l $PCP_RUN_DIR/pmlogger.*
$check && touch $tmp.err
fi
done
else
if $check
then
if _is_activating
then
# not running but activating, so avoid false alarm
#
:
else
echo "Warning: no pmloggers running"
touch $tmp.err
fi
else
echo "Warning: no pmloggers running"
fi
fi
# Check for dead control files with no pmlogger attached
#
if [ -d $PCP_TMP_DIR/pmlogger ]
then
for file in `cd $PCP_TMP_DIR/pmlogger; ls`
do
case "$file"
in
primary)
pid=`ls -l $PCP_TMP_DIR/pmlogger/primary | sed -e 's/.*\///'`
;;
*)
pid="$file"
;;
esac
$PCP_AWK_PROG <$tmp.ps.out >$tmp.ps.pid '
$2 == '"$pid"' { print }'
if [ -s $tmp.ps.pid ]
then
# OK
:
else
# Need to new snapshot of ps in case some pmlogger
# has been started since the last snapshot ...
# and check again
#
$PCP_PS_PROG $PCP_PS_ALL_FLAGS >$tmp.ps.out
$PCP_AWK_PROG <$tmp.ps.out >$tmp.ps.pid '
$2 == '"$pid"' { print }'
if [ -s $tmp.ps.pid ]
then
# OK
:
else
echo "Warning: no matching pmlogger process for ..." >>$seq_full
ls -l $PCP_TMP_DIR/pmlogger/$file >>$seq_full
if $check
then
$sudo rm -f $PCP_TMP_DIR/pmlogger/$file
echo "File removed." >>$seq_full
fi
fi
fi
done
else
echo "Error: directory $PCP_TMP_DIR/pmlogger missing!"
$check && touch $tmp.err
fi
# Check for dead run files with no pmlogger attached
#
if [ -d $PCP_RUN_DIR ]
then
for file in `cd $PCP_RUN_DIR; ls`
do
pid=''
case "$file"
in
pmlogger.primary.socket)
pid=`ls -l $PCP_RUN_DIR/pmlogger.primary.socket | sed -e 's/.*\///' -e 's/pmlogger\.//' -e 's/\.socket//'`
;;
pmlogger.pid)
pid=`cat $PCP_RUN_DIR/$file`
;;
pmlogger.*)
pid=`echo $file | sed -e 's/pmlogger\.//' -e 's/\.socket//'`
;;
esac
[ -z "$pid" ] && continue
$PCP_AWK_PROG <$tmp.ps.out >$tmp.ps.pid '
$2 == '"$pid"' { print }'
if [ -s $tmp.ps.pid ]
then
# OK
:
else
# Need to new snapshot of ps in case some pmlogger
# has been started since the last snapshot ...
# and check again
#
$PCP_PS_PROG $PCP_PS_ALL_FLAGS >$tmp.ps.out
$PCP_AWK_PROG <$tmp.ps.out >$tmp.ps.pid '
$2 == '"$pid"' { print }'
if [ -s $tmp.ps.pid ]
then
# OK
:
else
echo "Warning: no matching pmlogger process for ..." >>$seq_full
ls -l $PCP_RUN_DIR/$file >>$seq_full
if $check
then
$sudo rm -f $PCP_RUN_DIR/$file
case "$file"
in
*.socket) echo "Socket removed." >>$seq_full
;;
*) echo "File removed." >>$seq_full
;;
esac
fi
fi
fi
done
else
echo "Error: directory $PCP_RUN_DIR missing!"
$check && touch $tmp.err
fi
# Did a QA test leave behind a modified pmlogger control file?
#
marker='PCP QA test'
if grep "$marker" $PCP_PMLOGGERCONTROL_PATH >/dev/null
then
echo "Error: $PCP_PMLOGGERCONTROL_PATH changed for QA, may not be correct"
grep "$marker" $PCP_PMLOGGERCONTROL_PATH
$check && touch $tmp.err
fi
# And finally, at most 1 primary pmlogger
#
num_primary=`grep 'pmlogger .*-P ' <$tmp.ps.out | wc -l | sed -e 's/ //g'`
if [ "$num_primary" -gt 1 ]
then
echo "Error: more than one primary logger running ..."
head -1 $tmp.ps.out
grep 'pmlogger .*-P ' <$tmp.ps.out
$check && touch $tmp.err
fi
}
# real QA test starts here
_check
if $check
then
if [ -f $tmp.err ]
then
echo "Now: `date`"
status=1
fi
exit
fi
echo
echo "start another primary pmlogger (expect failure) ..." | tee -a $seq_full
_start_up_pmlogger -Dcontext,pmlc -L -c /dev/null -l $tmp.log -P $tmp.arch
echo "pmlogger pid $pid" >>$seq_full
for i in 1 2 3 4 5
do
present=`$PCP_PS_PROG $PCP_PS_ALL_FLAGS \
| $PCP_AWK_PROG '$2 == '"$pid"' { print "true"; exit } END { print "false" }'`
$present || break
sleep 1
done
if $present
then
echo "Arrggh ... pmlogger appears to be running ..."
$PCP_PS_PROG $PCP_PS_ALL_FLAGS \
| grep -E "PID|$pid" \
| grep -v egrep
echo "kill off the primary pmlogger we just started ..."
$sudo kill -KILL $pid
for i in 1 2 3 4 5
do
present=`$PCP_PS_PROG $PCP_PS_ALL_FLAGS \
| $PCP_AWK_PROG '$2 == '"$pid"' { print "true"; exit } END { print "false" }'`
$present || break
sleep 1
done
if $present
then
echo "Arrggh ... failed to kill of pmlogger (pid $pid)"
$PCP_PS_PROG $PCP_PS_ALL_FLAGS \
| grep -E "PID|$pid" \
| grep -v egrep
fi
fi
grep -E 'ERROR:|info:' <$tmp.log \
| sed \
-e 's/pid [0-9][0-9]*/pid SOMEPID/g' \
-e "s@$PCP_TMP_DIR/@PCP_TMP_DIR/@g"
cat $tmp.log >>$seq_full
_check
exit
|