1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
|
#!/usr/bin/env python
#
# llcstat.py Summarize cache references and cache misses by PID.
# Cache reference and cache miss are corresponding events defined in
# uapi/linux/perf_event.h, it varies to different architecture.
# On x86-64, they mean LLC references and LLC misses.
#
# For Linux, uses BCC, eBPF. Embedded C.
#
# SEE ALSO: perf top -e cache-misses -e cache-references -a -ns pid,cpu,comm
#
# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support).
#
# Copyright (c) 2016 Facebook, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 19-Oct-2016 Teng Qin Created this.
# 20-Jun-2022 YeZhengMao Added tid info.
from __future__ import print_function
import argparse
from bcc import BPF, PerfType, PerfHWConfig
import signal
from time import sleep
parser = argparse.ArgumentParser(
description="Summarize cache references and misses by PID",
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
"-c", "--sample_period", type=int, default=100,
help="Sample one in this many number of cache reference / miss events")
parser.add_argument(
"duration", nargs="?", default=10, help="Duration, in seconds, to run")
parser.add_argument(
"-t", "--tid", action="store_true",
help="Summarize cache references and misses by PID/TID"
)
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
# load BPF program
bpf_text="""
#include <linux/ptrace.h>
#include <uapi/linux/bpf_perf_event.h>
struct key_t {
int cpu;
u32 pid;
u32 tid;
char name[TASK_COMM_LEN];
};
BPF_HASH(ref_count, struct key_t);
BPF_HASH(miss_count, struct key_t);
static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
u64 pid_tgid = bpf_get_current_pid_tgid();
key->cpu = bpf_get_smp_processor_id();
key->pid = pid_tgid >> 32;
key->tid = GET_TID ? (u32)pid_tgid : key->pid;
bpf_get_current_comm(&(key->name), sizeof(key->name));
}
int on_cache_miss(struct bpf_perf_event_data *ctx) {
struct key_t key = {};
get_key(&key);
miss_count.increment(key, ctx->sample_period);
return 0;
}
int on_cache_ref(struct bpf_perf_event_data *ctx) {
struct key_t key = {};
get_key(&key);
ref_count.increment(key, ctx->sample_period);
return 0;
}
"""
bpf_text = bpf_text.replace("GET_TID", "1" if args.tid else "0")
if args.ebpf:
print(bpf_text)
exit()
b = BPF(text=bpf_text)
try:
b.attach_perf_event(
ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CACHE_MISSES,
fn_name="on_cache_miss", sample_period=args.sample_period)
b.attach_perf_event(
ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CACHE_REFERENCES,
fn_name="on_cache_ref", sample_period=args.sample_period)
except Exception:
print("Failed to attach to a hardware event. Is this a virtual machine?")
exit()
print("Running for {} seconds or hit Ctrl-C to end.".format(args.duration))
try:
sleep(float(args.duration))
except KeyboardInterrupt:
signal.signal(signal.SIGINT, lambda signal, frame: print())
miss_count = {}
for (k, v) in b.get_table('miss_count').items():
if args.tid:
miss_count[(k.pid, k.tid, k.cpu, k.name)] = v.value
else:
miss_count[(k.pid, k.cpu, k.name)] = v.value
header_text = 'PID '
format_text = '{:<8d} '
if args.tid:
header_text += 'TID '
format_text += '{:<8d} '
header_text += 'NAME CPU REFERENCE MISS HIT%'
format_text += '{:<16s} {:<4d} {:>12d} {:>12d} {:>6.2f}%'
print(header_text)
tot_ref = 0
tot_miss = 0
for (k, v) in b.get_table('ref_count').items():
try:
if args.tid:
miss = miss_count[(k.pid, k.tid, k.cpu, k.name)]
else:
miss = miss_count[(k.pid, k.cpu, k.name)]
except KeyError:
miss = 0
tot_ref += v.value
tot_miss += miss
# This happens on some PIDs due to missed counts caused by sampling
hit = (v.value - miss) if (v.value >= miss) else 0
if args.tid:
print(format_text.format(
k.pid, k.tid, k.name.decode('utf-8', 'replace'), k.cpu, v.value, miss,
(float(hit) / float(v.value)) * 100.0))
else:
print(format_text.format(
k.pid, k.name.decode('utf-8', 'replace'), k.cpu, v.value, miss,
(float(hit) / float(v.value)) * 100.0))
print('Total References: {} Total Misses: {} Hit Rate: {:.2f}%'.format(
tot_ref, tot_miss, (float(tot_ref - tot_miss) / float(tot_ref)) * 100.0))
|