1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
|
#!/bin/sh
# PCP QA Test No. 1443
# check pmlogger_janitor
#
# Copyright (c) 2023 Ken McDonell. All Rights Reserved.
#
if [ $# -eq 0 ]
then
seq=`basename $0`
echo "QA output created by $seq"
else
# use $seq from caller, unless not set
[ -n "$seq" ] || seq=`basename $0`
echo "QA output created by `basename $0` $*"
fi
# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check
# this test needs to be able to scan *all* the command arguments in the
# output of ps ...
#
[ "$PCP_PLATFORM" = solaris ] && _notrun "ps(1) args field limited to 80 chars on Solaris"
_check_job_scheduler
_cleanup()
{
cd $here
if $needclean
then
_service pmlogger start 2>&1 | _filter_pcp_start
_restore_job_scheduler $tmp.cron $tmp.systemd $sudo
_wait_for_pmlogger
needclean=false
fi
$sudo rm -rf $tmp $tmp.* /var/tmp/janitor-$seq.log
for dir in orphan-$hostname orphan-localhost
do
$sudo rm -rf "$PCP_ARCHIVE_DIR/$dir"
done
$sudo rm -f "$PCP_ETC_DIR/pcp/pmlogger/control.d/orphan-$hostname"
$sudo rm -f "$PCP_ETC_DIR/pcp/pmlogger/control.d/orphan-localhost"
}
status=0 # success is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15
needclean=true
hostname=`hostname | sed -e 's/\..*//'`
_filter()
{
tee -a $seq_full \
| sed \
-e '/ PPID /d' \
-e '/ TIME /d' \
-e "/sh \\.\/$seq\$/d" \
-e "/ $seq\$/d" \
-e "/^[1-9][0-9]* .* $seq\$/d" \
-e '/check .*-g pmlogger/d' \
-e '/^pcp /d' \
-e '/^Start \[/d' \
-e '/^End \[/d' \
-e '/^Pass /d' \
-e '/^Info: processing archives from remote pmlogger /d' \
-e 's/2[0-9][0-9][0-9][01][0-9][0-3][0-9]\.[0-5][0-9]\.[0-5][0-9]/TIMESTAMP/g' \
-e 's/TIMESTAMP-[0-9][0-9]/TIMESTAMP/g' \
-e "s@$PCP_ARCHIVE_DIR/@PCP_ARCHIVE_DIR/@g" \
-e "s@$PCP_TMP_DIR/@PCP_TMP_DIR/@g" \
-e "s/$hostname/HOSTNAME/g" \
-e "s/PID $pid_1\$/PID PID_1/" \
-e "s/PID $pid_2\$/PID PID_2/" \
-e "/PID $$/s/$$/MYPID/g" \
-e '/^Warning: ignoring packaging backup control file/d' \
| $PCP_AWK_PROG '
$1 == "PID" && $NF == "CMD" { skip = 2 }
skip > 0 { skip--; next }
{ print }'
}
# real QA test starts here
_remove_job_scheduler $tmp.cron $tmp.systemd $sudo
# create orphaned pmloggers for pmlogger_janitor testing
#
cat <<End-of-File >$tmp.policy
# policy file for the orphan class, qa/1443
[class]
orphan
[ident]
orphan-%h
[control]
\$version=1.1
\$PMLOGGER_CHECK_SKIP_JANITOR=yes
\$class=orphan
%h n n PCP_ARCHIVE_DIR/%i -c $tmp.config
[create]
# matches all hosts
hostname(.*)
End-of-File
cat <<End-of-File >$tmp.config
log mandatory on default { sampledso.bin }
End-of-File
# start 2 pmloggers
#
for host in localhost $hostname
do
$sudo rm -f /etc/pcp/pmlogger/control.d/orphan-$host
$sudo pmlogctl create -p $tmp.policy $host
$sudo pmlogctl start -p $tmp.policy -c orphan $host
done
# looking for a line like ...
# localhost 20230318.07.31 orphan 729669 running
# to get the pid
#
pid_1=`pmlogctl status -c orphan localhost 2>&1 | awk '
NF == 5 && $5 = "running" { print $4 }'`
pmlogctl status -c orphan localhost >>$seq_full
echo "pid_1=$pid_1" >>$seq_full
if [ -z "$pid_1" ]
then
echo "Arrgh, no PID from ..."
pmlogctl status -c orphan localhost
else
# remove the status file for this one
#
$sudo rm $PCP_TMP_DIR/pmlogger/$pid_1
fi
pid_2=`pmlogctl status -c orphan $hostname 2>&1 | awk '
NF == 5 && $5 = "running" { print $4 }'`
pmlogctl status -c orphan $hostname >>$seq_full
echo "pid_2=$pid_2" >>$seq_full
if [ -z "$pid_2" ]
then
echo "Arrgh, no PID from ..."
pmlogctl status -c orphan $hostname
fi
# hide 'em from the control files
#
for host in localhost $hostname
do
$sudo rm -f /etc/pcp/pmlogger/control.d/orphan-$host
done
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mlogger( |$)' >>$seq_full
# customer issue from sagar.sagar AT oracle.com
# stale file, $PCP_TMP_DIR/pmlogger/<pid> from an old pmlogger that
# is long-gone, but now <pid> is an active process that is NOT a
# pmlogger ... the janitor was trying to kill <pid>
#
cat <<End-of-File >$tmp.mapfile
12345
no.such.host
/no/such/path
pmlogger_check
End-of-File
# use the QA test's PID, to make sure we don't kill a non-pmlogger
# process
#
echo "I am PID $$ [not a pmlogger]" >>$seq_full
$sudo -u $PCP_USER cp $tmp.mapfile $PCP_TMP_DIR/pmlogger/$$
ls -l $PCP_TMP_DIR/pmlogger >>$seq_full
# now run the janitor ...
#
# need to be a place the user $PCP_USER can cd to from pmlogger_janitor
#
cd /var/tmp
$sudo -u $PCP_USER $PCP_BINADM_DIR/pmlogger_janitor -V -l /var/tmp/janitor-$seq.log
cd $here
cat /var/tmp/janitor-$seq.log >>$seq_full
_filter </var/tmp/janitor-$seq.log >$tmp.tmp
grep Killing <$tmp.tmp | LC_COLLATE=POSIX sort
grep Compressing <$tmp.tmp | LC_COLLATE=POSIX sort
grep -v Killing <$tmp.tmp | grep -v Compressing
# success, all done
exit
|