File: 1201

package info (click to toggle)
pcp 7.0.5-1
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 252,916 kB
  • sloc: ansic: 1,478,844; sh: 177,285; xml: 160,462; cpp: 83,809; python: 24,349; perl: 18,303; yacc: 6,877; lex: 2,864; makefile: 2,694; awk: 165; fortran: 60; java: 52
file content (188 lines) | stat: -rwxr-xr-x 5,152 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/bin/sh
# PCP QA Test No. 1201
# pmlogger_check for primary logger and pmcd may not be running (yet)
# - systemd and package install failure case
#
# This is the multiple pmlogger version.  See qa/1200 for the single
# pmlogger version.
#
# Copyright (c) 2020 Ken McDonell.  All Rights Reserved.
#

seq=`basename $0`
echo "QA output created by $seq"

# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check

_cleanup()
{
    cd $here
    if $_needclean
    then
	[ -f ${PCP_PMLOGGERCONTROL_PATH}.d/$remote ] \
	    && $sudo rm -f ${PCP_PMLOGGERCONTROL_PATH}.d/$remote
	export PMLOGGER_CHECK_SKIP_JANITOR=no
	_service pmcd restart 2>&1 | _filter_pcp_restart
	_wait_for_pmcd
	_service pmlogger restart 2>&1 | _filter_pcp_restart
	_wait_for_pmlogger
	_needclean=false
    fi
    $sudo rm -rf $tmp $tmp.*
}

# borrowed from _wait_for_pmlogger
#
_my_wait_for_pmlogger()
{
    # 6 seconds default seems like a reasonable max time to get going
    _maxdelay=6
    _dir_hostname=`hostname || echo localhost`
    _logfile="$PCP_ARCHIVE_DIR/$_dir_hostname/pmlogger.log" 

    _i=0
    _dead=true
    while [ $_i -lt $_maxdelay ]
    do
	if $sudo -u $PCP_USER pmlc -P </dev/null 2>&1 \
		| tee $tmp.err \
		| grep -E "Connection refused|Transport endpoint is not connected" >/dev/null
	then
	    sleep 1
	    _i=`expr $_i + 1`
        else
	    # pmlogger socket has been set up ...
	    _dead=false
	    # give pmlogger a chance to detect that pmlc has gone away
	    # so the port is free
	    sleep 1
	    break
	fi
    done
    if $_dead
    then
	echo "now: `date`"
	echo "Oops ... primary pmlogger failed to start after $_maxdelay seconds"
	echo "pmlogger log ($_logfile) ..."
	if [ -f $_logfile ]
	then
	    cat $_logfile
	else
	    echo "Not created ... this is good as it means pmlogger_check noticed"
	fi
	echo "pmlc attempt ..."
	[ -f $tmp.err ] && cat $tmp.err
    fi
}

_filter()
{
    tee -a $seq_full \
    | sed \
	-e '/^now: /s/ .*/ DATE/' \
	-e "s@$PCP_ARCHIVE_DIR@PCP_ARCHIVE_DIR@" \
	-e "s@$_dir_hostname@HOSTNAME@" \
    | _filter_pmlogger_log
}

status=1	# failure is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15

remote=`./getpmcdhosts -L -n 1`
[ -z "$remote" ] && _notrun "Cannot find remote host running pmcd"
echo "remote=\"$remote\"" >>$seq_full

_needclean=true
myhost=`hostname`

cat <<End-of-File >$tmp.config
log mandatory on once { pmcd }
log advisory on default { kernel.all.cpu }
End-of-File

cat <<End-of-File >$tmp.control
# Installed by PCP QA test $seq on `date`
\$version=1.1
$remote         n   n   PCP_ARCHIVE_DIR/$remote          -c $tmp.config
End-of-File

$sudo cp $tmp.control ${PCP_PMLOGGERCONTROL_PATH}.d/$remote

# real QA test starts here
echo "[`date`] initially" >>$seq_full
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p](mcd|mlogger)' >>$seq_full

# stop pmcd, and all pmloggers
#
echo "[`date`] pcp stop" >>$seq_full
if ! _service pmlogger stop 2>&1; then _exit 1; fi \
| _filter_pcp_stop
_wait_pmlogger_end || _exit 1
if ! _service pmcd stop 2>&1; then _exit 1; fi \
| _filter_pcp_stop
_wait_pmcd_end || _exit 1
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p](mcd|mlogger)' >>$seq_full

# from here on, don't use any "_service" wrapper ... we need to dodge
# any linking of the services and starting stuff under the covers
#
_dir_hostname=`hostname || echo localhost`
_logfile="$PCP_ARCHIVE_DIR/$_dir_hostname/pmlogger.log" 
$sudo rm -f $_logfile
echo "[`date`]" >>$seq_full
echo "pmcd not running, expect this to timeout" | tee -a $seq_full
$sudo $PCP_RC_DIR/pmlogger start 2>&1 | _filter_pcp_start
_my_wait_for_pmlogger | _filter
echo "[`date`]" >>$seq_full
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p](mcd|mlogger)' >>$seq_full

$sudo $PCP_RC_DIR/pmcd start 2>&1 | _filter_pcp_start
_wait_for_pmcd || _exit 1

# the "rc" pmlogger script calls pmlogger_check and now pmlogger_check
# uses pmlogger_janitor to kill off any "lost" pmlogger's, we don't want
# pmlogger_janitor to kill the primary pmlogger we're about to start ...
#
export PMLOGGER_CHECK_SKIP_JANITOR=yes

$sudo rm -f $_logfile
echo "[`date`]" >>$seq_full
echo "pmcd running, expect this to work" | tee -a $seq_full
$sudo $PCP_RC_DIR/pmlogger start 2>&1 | _filter_pcp_start
_wait_for_pmlogger || _exit 1
echo "[`date`]" >>$seq_full
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p](mcd|mlogger)' >>$seq_full
pminfo -f pmcd.pmlogger.pmcd_host | tee -a $seq_full >$tmp.tmp
if grep '"primary"' $tmp.tmp >/dev/null
then
    echo "Found primary pmlogger"
else
    echo "Error: primary pmlogger missing"
    cat $tmp.tmp
    pcp
fi
if grep '"'"$remote"'"' $tmp.tmp >/dev/null
then
    echo "Found non-primary pmlogger"
else
    # getpmcdhosts may have returned a FQDN, but hostname() on the remote
    # host may return an abbreviated name which is the hostname we see
    # in pmcd.pmlogger.pmcd_host ... try that
    #
    r=`echo $remote | sed -e 's/\..*//'`
    if grep '"'"$r"'"' $tmp.tmp >/dev/null
    then
	echo "Found non-primary pmlogger"
    else
	echo "Error: non-primary pmlogger missing"
	cat $tmp.tmp
	pcp
    fi
fi

# success, all done
status=0
exit