1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
|
#!/bin/bash
# perf record tests (exclusive)
# SPDX-License-Identifier: GPL-2.0
set -e
shelldir=$(dirname "$0")
# shellcheck source=lib/waiting.sh
. "${shelldir}"/lib/waiting.sh
# shellcheck source=lib/perf_has_symbol.sh
. "${shelldir}"/lib/perf_has_symbol.sh
testsym="test_loop"
testsym2="brstack"
skip_test_missing_symbol ${testsym}
skip_test_missing_symbol ${testsym2}
err=0
perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
script_output=$(mktemp /tmp/__perf_test.perf.data.XXXXX.script)
testprog="perf test -w thloop"
cpu_pmu_dir="/sys/bus/event_source/devices/cpu*"
br_cntr_file="/caps/branch_counter_nr"
br_cntr_output="branch stack counters"
br_cntr_script_output="br_cntr: A"
default_fd_limit=$(ulimit -Sn)
# With option --threads=cpu the number of open file descriptors should be
# equal to sum of: nmb_cpus * nmb_events (2+dummy),
# nmb_threads for perf.data.n (equal to nmb_cpus) and
# 2*nmb_cpus of pipes = 4*nmb_cpus (each pipe has 2 ends)
# All together it needs 8*nmb_cpus file descriptors plus some are also used
# outside of testing, thus raising the limit to 16*nmb_cpus
min_fd_limit=$(($(getconf _NPROCESSORS_ONLN) * 16))
cleanup() {
rm -f "${perfdata}"
rm -f "${perfdata}".old
rm -f "${script_output}"
trap - EXIT TERM INT
}
trap_cleanup() {
echo "Unexpected signal in ${FUNCNAME[1]}"
cleanup
exit 1
}
trap trap_cleanup EXIT TERM INT
test_per_thread() {
echo "Basic --per-thread mode test"
if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null
then
echo "Per-thread record [Skipped event not supported]"
return
fi
if ! perf record --per-thread -o "${perfdata}" ${testprog} 2> /dev/null
then
echo "Per-thread record [Failed record]"
err=1
return
fi
if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
then
echo "Per-thread record [Failed missing output]"
err=1
return
fi
# run the test program in background (for 30 seconds)
${testprog} 30 &
TESTPID=$!
rm -f "${perfdata}"
wait_for_threads ${TESTPID} 2
perf record -p "${TESTPID}" --per-thread -o "${perfdata}" sleep 1 2> /dev/null
kill ${TESTPID}
if [ ! -e "${perfdata}" ]
then
echo "Per-thread record [Failed record -p]"
err=1
return
fi
if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
then
echo "Per-thread record [Failed -p missing output]"
err=1
return
fi
echo "Basic --per-thread mode test [Success]"
}
test_register_capture() {
echo "Register capture test"
if ! perf list pmu | grep -q 'br_inst_retired.near_call'
then
echo "Register capture test [Skipped missing event]"
return
fi
if ! perf record --intr-regs=\? 2>&1 | grep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15'
then
echo "Register capture test [Skipped missing registers]"
return
fi
if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call \
-c 1000 --per-thread ${testprog} 2> /dev/null \
| perf script -F ip,sym,iregs -i - 2> /dev/null \
| grep -q "DI:"
then
echo "Register capture test [Failed missing output]"
err=1
return
fi
echo "Register capture test [Success]"
}
test_system_wide() {
echo "Basic --system-wide mode test"
if ! perf record -aB --synth=no -o "${perfdata}" ${testprog} 2> /dev/null
then
echo "System-wide record [Skipped not supported]"
return
fi
if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
then
echo "System-wide record [Failed missing output]"
err=1
return
fi
if ! perf record -aB --synth=no -e cpu-clock,cs --threads=cpu \
-o "${perfdata}" ${testprog} 2> /dev/null
then
echo "System-wide record [Failed record --threads option]"
err=1
return
fi
if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
then
echo "System-wide record [Failed --threads missing output]"
err=1
return
fi
echo "Basic --system-wide mode test [Success]"
}
test_workload() {
echo "Basic target workload test"
if ! perf record -o "${perfdata}" ${testprog} 2> /dev/null
then
echo "Workload record [Failed record]"
err=1
return
fi
if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
then
echo "Workload record [Failed missing output]"
err=1
return
fi
if ! perf record -e cpu-clock,cs --threads=package \
-o "${perfdata}" ${testprog} 2> /dev/null
then
echo "Workload record [Failed record --threads option]"
err=1
return
fi
if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
then
echo "Workload record [Failed --threads missing output]"
err=1
return
fi
echo "Basic target workload test [Success]"
}
test_branch_counter() {
echo "Branch counter test"
# Check if the branch counter feature is supported
for dir in $cpu_pmu_dir
do
if [ ! -e "$dir$br_cntr_file" ]
then
echo "branch counter feature not supported on all core PMUs ($dir) [Skipped]"
return
fi
done
if ! perf record -o "${perfdata}" -e "{branches:p,instructions}" -j any,counter ${testprog} 2> /dev/null
then
echo "Branch counter record test [Failed record]"
err=1
return
fi
if ! perf report -i "${perfdata}" -D -q | grep -q "$br_cntr_output"
then
echo "Branch counter report test [Failed missing output]"
err=1
return
fi
if ! perf script -i "${perfdata}" -F +brstackinsn,+brcntr | grep -q "$br_cntr_script_output"
then
echo " Branch counter script test [Failed missing output]"
err=1
return
fi
echo "Branch counter test [Success]"
}
test_cgroup() {
echo "Cgroup sampling test"
if ! perf record -aB --synth=cgroup --all-cgroups -o "${perfdata}" ${testprog} 2> /dev/null
then
echo "Cgroup sampling [Skipped not supported]"
return
fi
if ! perf report -i "${perfdata}" -D | grep -q "CGROUP"
then
echo "Cgroup sampling [Failed missing output]"
err=1
return
fi
if ! perf script -i "${perfdata}" -F cgroup | grep -q -v "unknown"
then
echo "Cgroup sampling [Failed cannot resolve cgroup names]"
err=1
return
fi
echo "Cgroup sampling test [Success]"
}
test_uid() {
echo "Uid sampling test"
if ! perf record -aB --synth=no --uid "$(id -u)" -o "${perfdata}" ${testprog} \
> "${script_output}" 2>&1
then
if grep -q "libbpf.*EPERM" "${script_output}"
then
echo "Uid sampling [Skipped permissions]"
return
else
echo "Uid sampling [Failed to record]"
err=1
# cat "${script_output}"
return
fi
fi
if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
then
echo "Uid sampling [Failed missing output]"
err=1
return
fi
echo "Uid sampling test [Success]"
}
test_leader_sampling() {
echo "Basic leader sampling test"
if ! perf record -o "${perfdata}" -e "{cycles,cycles}:Su" -- \
perf test -w brstack 2> /dev/null
then
echo "Leader sampling [Failed record]"
err=1
return
fi
perf script -i "${perfdata}" | grep brstack > $script_output
# Check if the two instruction counts are equal in each record.
# However, the throttling code doesn't consider event grouping. During throttling, only the
# leader is stopped, causing the slave's counts significantly higher. To temporarily solve this,
# let's set the tolerance rate to 80%.
# TODO: Revert the code for tolerance once the throttling mechanism is fixed.
index=0
valid_counts=0
invalid_counts=0
tolerance_rate=0.8
while IFS= read -r line
do
cycles=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="cycles:") print $(i-1)}')
if [ $(($index%2)) -ne 0 ] && [ ${cycles}x != ${prev_cycles}x ]
then
invalid_counts=$(($invalid_counts+1))
else
valid_counts=$(($valid_counts+1))
fi
index=$(($index+1))
prev_cycles=$cycles
done < "${script_output}"
total_counts=$(bc <<< "$invalid_counts+$valid_counts")
if (( $(bc <<< "$total_counts <= 0") ))
then
echo "Leader sampling [No sample generated]"
err=1
return
fi
isok=$(bc <<< "scale=2; if (($invalid_counts/$total_counts) < (1-$tolerance_rate)) { 0 } else { 1 };")
if [ $isok -eq 1 ]
then
echo "Leader sampling [Failed inconsistent cycles count]"
err=1
else
echo "Basic leader sampling test [Success]"
fi
}
test_topdown_leader_sampling() {
echo "Topdown leader sampling test"
if ! perf stat -e "{slots,topdown-retiring}" true 2> /dev/null
then
echo "Topdown leader sampling [Skipped event parsing failed]"
return
fi
if ! perf record -o "${perfdata}" -e "{instructions,slots,topdown-retiring}:S" true 2> /dev/null
then
echo "Topdown leader sampling [Failed topdown events not reordered correctly]"
err=1
return
fi
echo "Topdown leader sampling test [Success]"
}
test_precise_max() {
local -i skipped=0
echo "precise_max attribute test"
# Just to make sure event cycles is supported for sampling
if perf record -o "${perfdata}" -e "cycles" true 2> /dev/null
then
if ! perf record -o "${perfdata}" -e "cycles:P" true 2> /dev/null
then
echo "precise_max attribute [Failed cycles:P event]"
err=1
return
fi
else
echo "precise_max attribute [Skipped no cycles:P event]"
((skipped+=1))
fi
# On s390 event instructions is not supported for perf record
if perf record -o "${perfdata}" -e "instructions" true 2> /dev/null
then
# On AMD, cycles and instructions events are treated differently
if ! perf record -o "${perfdata}" -e "instructions:P" true 2> /dev/null
then
echo "precise_max attribute [Failed instructions:P event]"
err=1
return
fi
else
echo "precise_max attribute [Skipped no instructions:P event]"
((skipped+=1))
fi
if [ $skipped -eq 2 ]
then
echo "precise_max attribute [Skipped no hardware events]"
else
echo "precise_max attribute test [Success]"
fi
}
test_callgraph() {
echo "Callgraph test"
case $(uname -m)
in s390x)
cmd_flags="--call-graph dwarf -e cpu-clock";;
*)
cmd_flags="-g";;
esac
if ! perf record -o "${perfdata}" $cmd_flags perf test -w brstack
then
echo "Callgraph test [Failed missing output]"
err=1
return
fi
if ! perf report -i "${perfdata}" 2>&1 | grep "${testsym2}"
then
echo "Callgraph test [Failed missing symbol]"
err=1
return
fi
echo "Callgraph test [Success]"
}
# raise the limit of file descriptors to minimum
if [[ $default_fd_limit -lt $min_fd_limit ]]; then
ulimit -Sn $min_fd_limit
fi
test_per_thread
test_register_capture
test_system_wide
test_workload
test_branch_counter
test_cgroup
test_uid
test_leader_sampling
test_topdown_leader_sampling
test_precise_max
test_callgraph
# restore the default value
ulimit -Sn $default_fd_limit
cleanup
exit $err
|