File: 1483

package info (click to toggle)
pcp 7.1.0-1
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 252,748 kB
  • sloc: ansic: 1,483,656; sh: 182,366; xml: 160,462; cpp: 83,813; python: 24,980; perl: 18,327; yacc: 6,877; lex: 2,864; makefile: 2,738; awk: 165; fortran: 60; java: 52
file content (194 lines) | stat: -rwxr-xr-x 6,865 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#!/bin/sh
# PCP QA Test No. 1483
# look for bad syslog entries
#
# Copyright (c) 2024 Ken McDonell.  All Rights Reserved.
#
# check-group-exclude: pmlogger_check pmlogger_daily
#

if [ $# -eq 0 ]
then
    seq=`basename $0`
    echo "QA output created by $seq"
else
    # use $seq from caller, unless not set
    [ -n "$seq" ] || seq=`basename $0`
    echo "QA output created by `basename $0` $*"
fi

# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check


which journalctl >/dev/null 2>&1 || _notrun "no journalctl executable installed"
[ "$PCPQA_SYSTEMD" = no ] && _notrun "we're not using systemd here, so journalctl not useful"

_cleanup()
{
    cd $here
    $sudo rm -rf $tmp $tmp.*
}

status=0	# success is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15

# cull expected lines from journalctl
#
_filter()
{
    # the first block are lines that are OK for any service,
    # then the case ... esac deals with the per-service
    # culling
    #
    # and there is some undiagnosed issue when QA is running that
    # dinks with /var/log/pcp/NOTICES ... triage has failed and
    # the file always ends up with the correct permissions after
    # QA is done
    #
    sed \
	-e '/^-- Boot .* --$/d' \
	-e '/^-- Reboot --$/d' \
	-e '/^-- No entries --$/d' \
	-e '/^-- Journal begins at /d' \
	-e '/^-- Logs begin at /d' \
	-e '/: End[: ]/d' \
	-e '/error while loading shared libraries: libpcp/d' \
	-e '/rc\[[0-9]*]: .*pmpost:.* cannot open .*NOTICES/d' \
	-e '/rc\[[0-9]*]: .*pmpost: unposted message:/d' \
    | case "$1"
    in

	pmcd)
	    sed \
		-e '/ Installing .* PMDA /d' \
		-e '/ Removing .* PMDA /d' \
		-e '/ Rebuilding PMNS /d' \
		-e '/pmcd\[[0-9]*]: .* pmdaopenmetrics([0-9]*) Info:/d' \
		-e '/pmcd\[[0-9]*]: .* pmdaopentelemetry([0-9]*) Info:/d' \
		-e '/pmcd\[[0-9]*]: \.*$/d' \
		-e '/pmcd\[[0-9]*]: .*\.\.done$/d' \
		-e '/pmcd\[[0-9]*]: Terminated/d' \
		-e '/root\[[0-9]*]: pmcd_wait failed in /d' \
		-e '/pmcd\[[0-9]*]: _pmda_setup: Interrupted!/d' \
		-e '/pmcd\[[0-9]*]: _pmda_setup_cleanup: reset \.NeedInstall/d' \
		-e '/pmcd\[[0-9]*]: .*\/pmcd: .* cannot start pmcd/d' \
		-e '/systemctl\[[0-9]*]: .* pmcd\.service changed on disk/d' \
	    #end
	    ;;

	pmie*)
	    # cull regular pmie rule firing ....
	    # ... pcp-pmie[3330341]: Severe ...
	    # and these lines from qa/115
	    # ... rc[24566]: /etc/init.d/rc:
	    # ... rc[24566]: Error: PCP inference engine control file $PCP_PMIECONTROL_PATH ("/etc/pcp/pmie/control")
	    # ... rc[24566]:        is missing!  Cannot start any Performance Co-Pilot inference engine(s).
	    # ... rc[96813]: /etc/pcp/pmie/rc: Warning: Performance Co-Pilot Inference Engine (pmie) not permanently enabled.
	    # ... rc[96813]:     To enable pmie, run the following as root:
	    # ... rc[96813]:     # /bin/systemctl enable pmie.service
	    # and qa/575 seems capable of tripping this one
	    # ... pmie_farm[3016551]: End:
	    # and pmie_check will fail during PCP builds
	    # and this strange one but only on vm03
	    # ... vm03.localdomain pmiectl[1013253]: mount: write error
	    # and then just random QA noise
	    #

	    sed \
		-e '/ pcp-pmie\[/d' \
		-e '/rc\[[0-9]*]: .*\/rc:$/d' \
		-e '/pmie\[[0-9]*]: '"`echo "$PCP_SERVICES_DIR" | sed -e 's@/@\\\\/@g'`"'\/pmie:$/d' \
		-e '/rc\[[0-9]*]: Error: .* \$PCP_PMIECONTROL_PATH/d' \
		-e '/pmie\[[0-9]*]: Error: .* \$PCP_PMIECONTROL_PATH/d' \
		-e '/rc\[[0-9]*]: .*is missing!/d' \
		-e '/pmie\[[0-9]*]: .*is missing!/d' \
		-e '/rc\[[0-9]*]: .*not permanently enabled/d' \
		-e '/pmie\[[0-9]*]: .*not permanently enabled/d' \
		-e '/rc\[[0-9]*]: .*run the following as root:/d' \
		-e '/pmie\[[0-9]*]: .*run the following as root:/d' \
		-e '/rc\[[0-9]*]: .*systemctl enable pmie\.service/d' \
		-e '/pmie\[[0-9]*]: .*systemctl enable pmie\.service/d' \
		-e '/rc\[[0-9]*]: Terminated$/d' \
		-e '/pmie_farm\[[0-9]*]: End:/d' \
		-e '/pmie_check failed - see .*\/pmie_check.log/d' \
		-e '/pmie_daily failed - see .*\/pmie_daily.log/d' \
		-e '/pmiectl\[[0-9]*]: .* (localhost) defined multiple times,/d' \
		-e '/ pmiectl\[[0-9]*]: mount: write error/d' \
		-e '/pcp\[[0-9]*]: pmie_check start failed in/d' \
		-e "/pmiectl\[[0-9]*]: sed: couldn't flush stdout/d" \
	    # end
	    ;;

	pmlogger*)
	    # sudo babble
	    # and pmlogger_check will fail during PCP builds
	    # and pmlogger_daily will fail during PCP builds
	    # and pmlogger_janitor will fail during PCP builds
	    # and qa/1210 and qa/1213
	    # and lock collision with pmlogctl from pmlogger_farm_check
	    # and this strange one but only on bozo and vm03
	    # ... bozo.localdomain pmlogctl[1013253]: mount: write error
	    # and fallout from qa/1213 if pmlogger_farm_check goes off
	    # concurrently
	    # and then just random QA noise
	    sed \
		-e '/sudo\[[0-9]*]: /d' \
		-e '/pmlogger_check failed - see .*\/pmlogger_check.log/d' \
		-e '/pmlogger_daily failed - see .*\/pmlogger_daily.log/d' \
		-e '/pmlogger_daily failed - see .*\/pmlogger_daily-K.log/d' \
		-e '/pmlogger_janitor failed - see .*\/pmlogger_janitor.log/d' \
		-e '/pmlogctl\[[0-9]*]: .* failed to start for host no\.such\.host\.pcp\.io/d' \
		-e '/pmlogctl\[[0-9]*]: .* is another pmlogctl job running concurrently?/d' \
		-e '/pmlogctl\[[0-9]*]: .*\/pmlogger\/lock$/d' \
		-e '/pmlogctl\[[0-9]*]: .* failed to acquire exclusive lock/d' \
		-e '/pmlogctl\[[0-9]*]: [0-9][0-9]* pmlogctl/d' \
		-e '/pmlogctl\[[0-9]*]: Terminated$/d' \
		-e '/ pmlogctl\[[0-9]*]: mount: write error/d' \
		-e '/vm03 pmlogctl\[[0-9]*]: mount: write error/d' \
		-e '/pmlogctl\[[0-9]*]: .* (localhost) defined multiple times,/d' \
		-e '/rc\[[0-9]*]: Terminated$/d' \
		-e '/pmlogger\[[0-9]*]: Terminated$/d' \
		-e '/rc\[[0-9]*]: .*not permanently enabled\./d' \
		-e '/rc\[[0-9]*]: .*run the following as root:/d' \
		-e '/rc\[[0-9]*]: .*systemctl enable pmlogger\.service/d' \
		-e '/rc\[[0-9]*]: .*\/pmsignal: .* No such process/d' \
		-e '/pcp\[[0-9]*]: pmlogger_check_failed in/d' \
	    # end
	    ;;

	*)
	    cat
	    ;;
    esac
}

# real QA test starts here

for svc in \
    pmcd pmfind pmie pmie_check pmie_daily pmie_farm pmie_farm_check \
    pmlogger pmlogger_check pmlogger_daily pmlogger_farm pmlogger_farm_check \
    pmproxy
do
    echo
    echo "=== $svc ==="
    # want entries for the past 24 hours, but note we need "backwards"
    # MM-DD USA date format
    #
    $sudo journalctl --no-pager --since="`pmdate -1d '%Y-%m-%d %H:%M:%S'`" _SYSTEMD_UNIT=$svc.service 2>&1 \
    | _filter $svc
done

# additional diagnositics for stuff we don't understand!
#
# on ubuntu1804-container in CI
# Mar 04 19:39:45 cd959fdf2242 pmcd[1179757]: /usr/lib/pcp/bin/pcp-reboot-init: 44: [: -ne: unexpected operator
#
which id >>$seq_full 2>&1
id -u >>$seq_full 2>&1
$sudo id -u >>$seq_full 2>&1

# success, all done
exit