
|
#!/bin/sh
# PCP QA Test No. 956
# Exercise pmcd attribute PDU handling after agent failure.
#
# Copyright (c) 2015 Red Hat.
#
seq=`basename $0`
echo "QA output created by $seq"
# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check
trap "rm -f $tmp.*" 0
_get_libpcp_config
$unix_domain_sockets || _notrun "No unix domain socket support available"
_filter()
{
sed -e 's/\.\.\.*/\.\.\./g'
}
_cleanup()
{
_service pmcd stop | _filter_pcp_stop
unset PMCD_PORT
unset PMCD_SOCKET
_restore_config $PCP_PMCDOPTIONS_PATH
_service pmcd restart 2>&1 | _filter_pcp_restart
_wait_for_pmcd
_restore_auto_restart pmcd
_service pmlogger restart 2>&1 | _filter_pcp_restart
_wait_for_pmlogger
_restore_auto_restart pmlogger
if pmprobe -I pmcd.agent.status | grep '"dynamic"' >/dev/null
then
# need to uninstall dynamic PMDA
#
cd $here/pmdas/dynamic
$sudo ./Remove >>$seq_full 2>&1
cd $here
fi
if pminfo | grep '^dynamic' >/dev/null
then
# need to cleanup PMNS
#
cd $here/pmdas/dynamic
$sudo ./Remove >>$seq_full 2>&1
cd $here
fi
$sudo rm -f $tmp.*
}
status=1 # failure is the default!
signal=$PCP_BINADM_DIR/pmsignal
trap "_cleanup; exit \$status" 0 1 2 3 15
port=`_get_port tcp 6060 6070`
if [ -z "$port" ]
then
echo "Arrggh ... no free TCP port in the range 6060 ... 6070"
$NETSTAT -a
exit 1
fi
echo "port=$port" >>$seq_full
_save_config $PCP_PMCDOPTIONS_PATH
cat <<End-of-File >$tmp.newoptions
# New pmcd.options file created by PCP QA test $seq
#
-s $tmp.socket
End-of-File
_stop_auto_restart pmcd
_stop_auto_restart pmlogger
if ! _service pmlogger stop 2>&1; then _exit 1; fi \
| _filter_pcp_stop
_wait_pmlogger_end || _exit 1
if ! _service pmcd stop 2>&1; then _exit 1; fi \
| _filter_pcp_stop
_wait_pmcd_end || _exit 1
$sudo cp $tmp.newoptions $PCP_PMCDOPTIONS_PATH
# need to start pmcd indirectly in a shell script because sudo
# may cleanse the environment, which also means we cannot use
# the _service wrapper
#
echo "PMCD_PORT=$port; export PMCD_PORT" >$tmp.start
echo "PMCD_SOCKET=$tmp.socket; export PMCD_SOCKET" >>$tmp.start
echo "$PCP_RC_DIR/pmcd restart" >>$tmp.start
$sudo sh $tmp.start 2>&1 | _filter_pcp_start
PMCD_PORT=$port; export PMCD_PORT
PMCD_SOCKET=$tmp.socket; export PMCD_SOCKET
_wait_for_pmcd || _exit 1
cd $here/pmdas/dynamic
if [ -f GNUmakefile.install ]
then
$PCP_MAKE_PROG -f GNUmakefile.install clean >>$seq_full 2>&1
$PCP_MAKE_PROG -f GNUmakefile.install >>$seq_full 2>&1
else
$PCP_MAKE_PROG clean >>$seq_full 2>&1
$PCP_MAKE_PROG >>$seq_full 2>&1
fi
$sudo ./Install < /dev/null >$tmp.out 2>&1
cat $tmp.out | tee -a $seq_full | _filter_pmda_install | _filter
cd $here
# real QA test starts here
echo "Initial check of some metric access"
pmprobe -h unix:$tmp.socket -i hinv.ncpu
pmsleep 0.2
echo "Terminate a PMDA needing attributes" | tee -a $seq_full
date >>$seq_full
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mdadynamic( |$)' >> $seq_full
$signal -s KILL -an pmdadynamic >> $seq_full 2>&1
$sudo $signal -s KILL -a pmdadynamic >> $seq_full 2>&1
# wait up to 2 seconds for dynamic PMDA to die
i=0
while [ $i -lt 10 ]
do
$PCP_PS_PROG $PCP_PS_ALL_FLAGS >$tmp.tmp
echo "ps probe #$i" >>$seq_full
grep -E '[P]PID|/[p]mdadynamic( |$)' $tmp.tmp >>$seq_full
# looking for a defunct PMDA process
#
if grep -E '(pmdadynamic.*defunct)|( Z .*pmdadynamic)|\(pmdadynamic\)' $tmp.tmp >/dev/null
then
rm -f $tmp.tmp
break
elif grep "pmdadynamic" $tmp.tmp >/dev/null
then
# still running, keep trying
:
else
# not running at all
rm -f $tmp.tmp
break
fi
pmsleep 0.2
i=`expr $i + 1`
done
if [ -f $tmp.tmp ]
then
echo "Arrgh ... dynamic PMDA won't die"
grep -E '[P]PID|[p]mdadynamic' $tmp.tmp
exit
fi
date >>$seq_full
# and a bit more for pmcd to notice the PMDA has exited
pmsleep 3.75
echo "Tickle access to the failed PMDA, must see 'Try Again'"
pmprobe -h unix:$tmp.socket -i hinv.ncpu | tee $tmp.tmp
nval=`$PCP_AWK_PROG <$tmp.tmp '{print $2}'`
if [ -z "$nval" -o "$nval" -ge 0 ]
then
# this is not expected
#
echo "Error: nval=$nval not as expected ... see $seq.full"
pmprobe -h unix:$tmp.socket -i hinv.ncpu >>$seq_full
pminfo -f pmcd.agent.status pmcd.agent.fenced >>$seq_full
echo "+++ Tickle failed `date` +++" >>$seq_full
cat $PCP_LOG_DIR/pmcd/pmcd.log >>$seq_full
cat $PCP_LOG_DIR/pmcd/dynamic.log >>$seq_full
fi
echo "Verify subsequent return to healthy state"
pmprobe -h unix:$tmp.socket -i hinv.ncpu
# success, all done
status=0
exit
|