1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
|
#!/bin/sh
# PCP QA Test No. 1661
# Test pmproxy libpcp_web discovery file descriptor leaks
#
# Copyright (c) 2019 Red Hat. All Rights Reserved.
#
seq=`basename $0`
echo "QA output created by $seq"
# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check
[ $PCP_PLATFORM = linux ] || _notrun "Test only runs on Linux"
which pmproxy >/dev/null 2>&1 || _notrun "No pmproxy binary installed"
_check_key_server
_cleanup()
{
echo;echo === cleaning up
$sudo rm -rf "$dir"
echo;echo === see $seq.full for logs
echo "+++ pmproxy.log +++" >>$seq_full
cat $PCP_LOG_DIR/pmproxy/pmproxy.log >>$seq_full
echo "+++ pmproxy.log.prev +++" >>$seq_full
cat $PCP_LOG_DIR/pmproxy/pmproxy.log.prev >>$seq_full
echo "+++ pmlogger.log +++" >>$seq_full
cat $tmp.pmlogger.log >>$seq_full
_restore_config $PCP_ETC_DIR/pcp/pmlogger
_service pmcd restart 2>&1 | _filter_pcp_restart
_wait_for_pmcd
_service pmlogger restart 2>&1 | _filter_pcp_restart
_wait_for_pmlogger
if $pmproxy_was_running
then
echo "Restart pmproxy ..." >>$seq_full
_service pmproxy restart >>$seq_full 2>&1
_wait_for_pmproxy
else
echo "Stopping pmproxy ..." >>$seq_full
_service pmproxy stop >>$seq_full 2>&1
fi
$sudo rm -rf $tmp.*
}
_filter_pmproxy()
{
sed \
-e '/pmproxy: disabled time series, requires libuv support (missing)/d' \
| _filter_pmproxy_stop \
| _filter_pmproxy_start
}
_full_log()
{
echo; echo $@ | tee -a $seq_full
}
# get current log volume of primary pmlogger, according to pmproxy
#
_log_volume()
{
path=`pminfo -f pmcd.pmlogger.archive | sed -n -e '/"primary"]/{
s/"$//
s/.*"//
p
}'`
echo "primary pmlogger archive path: $path" >>$seq_full
echo "contents of /proc/$pmproxy_pid/fd/ ..." >>$seq_full
$sudo ls -l /proc/$pmproxy_pid/fd/ | \
tee -a $seq_full | \
grep "$path" | \
$PCP_AWK_PROG -F. '/pmlogger.'$LOCALHOST'.*.[0-9]$/ {print $NF; exit}'
}
status=1 # failure is the default!
dir="$PCP_ARCHIVE_DIR/qa$seq"
trap "_cleanup; exit \$status" 0 1 2 3 15
src/time_stamp $tmp.stamp begin >>$seq_full
PMLOGGER_CHECK_SKIP_LOGCONF=yes; export PMLOGGER_CHECK_SKIP_LOGCONF
# first remove all the old, stale, archives (the ones that might have
# accumulated for hosts other than this one during previous QA runs)
#
find $PCP_ARCHIVE_DIR -type f -mtime +30 \
| sed -e '/\/'"`hostname`"'\//d' \
| while read f
do
$sudo rm -f "$f"
done
src/time_stamp $tmp.stamp 'cleanup old archives' >>$seq_full
pmproxy_was_running=false
[ -f $PCP_RUN_DIR/pmproxy.pid ] && pmproxy_was_running=true
echo "pmproxy_was_running=$pmproxy_was_running" >>$seq_full
# real QA test starts here
LOCALHOST=`hostname`
# only want the primary logger running
_save_config $PCP_ETC_DIR/pcp/pmlogger
_restore_pmlogger_control
if ! _service pmlogger stop 2>&1; then _exit 1; fi | _filter_pcp_stop
_full_log === restarting pmproxy service to ensure sane starting condition
if ! _service pmproxy restart 2>&1; then _exit 1; fi | _filter_pcp_restart
_wait_for_pmproxy || _exit 1
pmproxy_pid=`_get_pids_by_name -a pmproxy`
[ -z "$pmproxy_pid" ] && echo === pmproxy not running && exit
date >>$seq_full
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mproxy( |$)' >>$seq_full
echo "pmproxy_pid=$pmproxy_pid" >>$seq_full
# uncomment these next 2 lines to attach gdb to the new pmproxy
# after 'c' to gdb, enter RETURN for shell running this script
#
#debug# echo "sudo gdb -p $pmproxy_pid $PCP_BINADM_DIR/pmproxy"
#debug# if [ -e /dev/tty ]; then read x </dev/tty; fi
src/time_stamp $tmp.stamp 'restart pmproxy' >>$seq_full
_full_log === creating archive in new archive directory
$sudo sh -c ". $PCP_SHARE_DIR/lib/rc-proc.sh; mkdir_and_chown $dir 775 $PCP_USER:$PCP_GROUP"
# make sure no archive files left here from a previous run of this test
$sudo rm -f $dir/testarchive.*
$sudo sh -c "pmlogger -s 10 -t 0.5sec -c config.default -l$tmp.pmlogger.log $dir/testarchive"
src/time_stamp $tmp.stamp 'pmlogger done' >>$seq_full
_full_log === checking pmproxy file descriptors for deleted files, should be none
if [ -d /proc/$pmproxy_pid ]
then
$sudo ls -l /proc/$pmproxy_pid/fd | grep 'testarchive.*(deleted)'
else
echo "Arrgh: pmproxy pid=$pmproxy_pid has vanished" | tee -a $seq_full
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mproxy( |$)' >>$seq_full
for log in $PCP_LOG_DIR/pmproxy/pmproxy.log*
do
if [ -f "$log" ]
then
echo >>$seq.log
ls -l "$log" >>$seq_full
cat "$log" >>$seq_full
else
echo "No pmproxy log files?" >>$seq_full
ls -ld $PCP_LOG_DIR/pmproxy >>$seq_full
ls -l $PCP_LOG_DIR/pmproxy >>$seq_full
fi
done
exit
fi
src/time_stamp $tmp.stamp 'fd check' >>$seq_full
_full_log === compressing the new archives, which also deletes the originals
for suff in 0 meta
do
if [ -f $dir/testarchive.$suff ]
then
$sudo xz $dir/testarchive.$suff
else
echo "Error: $dir/testarchive.$suff missing!"
ls -l $dir
fi
$sudo chown -R $PCP_USER $dir
done
src/time_stamp $tmp.stamp 'compression' >>$seq_full
_full_log === restarting pmlogger # primary only
if ! _service pmlogger restart 2>&1; then _exit 1; fi | _filter_pcp_restart
_wait_for_pmlogger || _exit 1
src/time_stamp $tmp.stamp 'restart pmcd & pmlogger' >>$seq_full
_full_log === wait for pmproxy to process filesystem events
# unfortunately, it seems that simply finding all the archives
# may take a while ... and there is no externally visible pmproxy
# state we can wait on, so this host-specific heuristic is all I
# can do to stop the test consistently failing on specific hosts
# - kenj
#
case `hostname`
in
bozo.localdomain|vm11.localdomain)
pmsleep 4
;;
*)
pmsleep 2
;;
esac
src/time_stamp $tmp.stamp 'wait for pmproxy' >>$seq_full
_full_log === checking pmproxy file descriptors for deleted files, should be none
if [ -d /proc/$pmproxy_pid ]
then
$sudo ls -l /proc/$pmproxy_pid/fd | grep 'testarchive.*(deleted)'
else
echo "Arrgh: pmproxy pid=$pmproxy_pid has vanished"
exit
fi
src/time_stamp $tmp.stamp 'check for deleted files' >>$seq_full
_full_log === checking pmproxy correctly switches logvol when pmlogger bumps to a new logvol
retry=0
while true; do
curlogvol=`_log_volume`
echo "curlogvol=$curlogvol" >>$seq_full
if [ -n "$curlogvol" ]
then
break
else
if [ $retry -gt 5 ]
then
echo "Arrgh ... curlogvol not found from ..."
$sudo ls -l /proc/$pmproxy_pid/fd/
echo "pmproxy_pid=$pmproxy_pid"
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mproxy( |$)'
exit
else
retry=`expr $retry + 1`
sleep 2
fi
fi
done
src/time_stamp $tmp.stamp 'found old volume #' >>$seq_full
pminfo -f pmproxy.discover.logvol.change_vol >>$seq_full 2>&1
( echo 'new volume'; echo 'log mandatory on once { sample.long.one }' ) | $sudo pmlc -P >> $seq_full 2>&1
# make sure pmproxy has noticed ...
# there is no apparent way to make this deterministic, see comment above
#
case `hostname`
in
bozo.localdomain|vm11.localdomain)
pmsleep 4
;;
*)
pmsleep 2
;;
esac
pminfo -f pmproxy.discover.logvol.change_vol >>$seq_full 2>&1
newlogvol=`_log_volume`
echo "newlogvol=$newlogvol" >>$seq_full
if [ -z "$newlogvol" ]
then
echo "Arrgh ... newlogvol not found from ..."
$sudo ls -l /proc/$pmproxy_pid/fd/
echo "pmproxy_pid=$pmproxy_pid"
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mproxy( |$)'
exit
fi
if [ `expr $curlogvol + 1` -ne $newlogvol ]
then
echo FAILED curlogvol=$curlogvol newlogvol=$newlogvol
else
echo passed
fi
src/time_stamp $tmp.stamp 'found new volume #' >>$seq_full
_full_log === checking pmproxy survives pmlogger restart
if ! _service pmlogger restart 2>&1; then _exit 1; fi | _filter_pcp_restart
_wait_for_pmlogger || _exit 1
$sudo [ ! -d /proc/$pmproxy_pid/fd/ ] && echo FAIL && exit
echo passed
src/time_stamp $tmp.stamp 'check pmproxy survives' >>$seq_full
# success, all done
status=0
exit
|