1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
#!/usr/bin/python
# IPC - Instructions Per Cycles using Perf Events and
# uprobes
# 24-Apr-2020 Saleem Ahmad Created this.
from bcc import BPF, utils
from optparse import OptionParser
# load BPF program
code="""
#include <uapi/linux/ptrace.h>
struct perf_delta {
u64 clk_delta;
u64 inst_delta;
u64 time_delta;
};
/*
Perf Arrays to read counter values for open
perf events.
*/
BPF_PERF_ARRAY(clk, MAX_CPUS);
BPF_PERF_ARRAY(inst, MAX_CPUS);
// Perf Output
BPF_PERF_OUTPUT(output);
// Per Cpu Data to store start values
BPF_PERCPU_ARRAY(data, u64);
#define CLOCK_ID 0
#define INSTRUCTION_ID 1
#define TIME_ID 2
void trace_start(struct pt_regs *ctx) {
u32 clk_k = CLOCK_ID;
u32 inst_k = INSTRUCTION_ID;
u32 time = TIME_ID;
int cpu = bpf_get_smp_processor_id();
/*
perf_read may return negative values for errors.
If cpu id is greater than BPF_PERF_ARRAY size,
counters values will be very large negative number.
NOTE: Use bpf_perf_event_value is recommended over
bpf_perf_event_read or map.perf_read() due to
issues in ABI. map.perf_read_value() need to be
implemented in future.
*/
u64 clk_start = clk.perf_read(cpu);
u64 inst_start = inst.perf_read(cpu);
u64 time_start = bpf_ktime_get_ns();
u64* kptr = NULL;
kptr = data.lookup(&clk_k);
if (kptr) {
data.update(&clk_k, &clk_start);
} else {
data.insert(&clk_k, &clk_start);
}
kptr = data.lookup(&inst_k);
if (kptr) {
data.update(&inst_k, &inst_start);
} else {
data.insert(&inst_k, &inst_start);
}
kptr = data.lookup(&time);
if (kptr) {
data.update(&time, &time_start);
} else {
data.insert(&time, &time_start);
}
}
void trace_end(struct pt_regs* ctx) {
u32 clk_k = CLOCK_ID;
u32 inst_k = INSTRUCTION_ID;
u32 time = TIME_ID;
int cpu = bpf_get_smp_processor_id();
/*
perf_read may return negative values for errors.
If cpu id is greater than BPF_PERF_ARRAY size,
counters values will be very large negative number.
NOTE: Use bpf_perf_event_value is recommended over
bpf_perf_event_read or map.perf_read() due to
issues in ABI. map.perf_read_value() need to be
implemented in future.
*/
u64 clk_end = clk.perf_read(cpu);
u64 inst_end = inst.perf_read(cpu);
u64 time_end = bpf_ktime_get_ns();
struct perf_delta perf_data = {} ;
u64* kptr = NULL;
kptr = data.lookup(&clk_k);
// Find elements in map, if not found return
if (kptr) {
perf_data.clk_delta = clk_end - *kptr;
} else {
return;
}
kptr = data.lookup(&inst_k);
if (kptr) {
perf_data.inst_delta = inst_end - *kptr;
} else {
return;
}
kptr = data.lookup(&time);
if (kptr) {
perf_data.time_delta = time_end - *kptr;
} else {
return;
}
output.perf_submit(ctx, &perf_data, sizeof(struct perf_delta));
}
"""
usage='Usage: ipc.py [options]\nexample ./ipc.py -l c -s strlen'
parser = OptionParser(usage)
parser.add_option('-l', '--lib', dest='lib_name', help='lib name containing symbol to trace, e.g. c for libc', type=str)
parser.add_option('-s', '--sym', dest='sym', help='symbol to trace', type=str)
(options, args) = parser.parse_args()
if (not options.lib_name or not options.sym):
parser.print_help()
exit()
num_cpus = len(utils.get_online_cpus())
b = BPF(text=code, cflags=['-DMAX_CPUS=%s' % str(num_cpus)])
# Attach Probes at start and end of the trace function
# NOTE: When attaching to a function for tracing, during runtime relocation
# stage by linker, function will be called once to return a different function
# address, which will be called by the process. e.g. in case of strlen
# after relocation stage, __strlen_sse2 is called instread of strlen.
# NOTE: There will be a context switch from userspace to kernel space,
# on caputring counters on USDT probes, so actual IPC might be slightly different.
# This example is to give a reference on how to use perf events with tracing.
b.attach_uprobe(name=options.lib_name, sym=options.sym, fn_name="trace_start")
b.attach_uretprobe(name=options.lib_name, sym=options.sym, fn_name="trace_end")
def print_data(cpu, data, size):
e = b["output"].event(data)
print("%-8d %-12d %-8.2f %-8s %d" % (e.clk_delta, e.inst_delta,
1.0* e.inst_delta/e.clk_delta, str(round(e.time_delta * 1e-3, 2)) + ' us', cpu))
print("Counters Data")
print("%-8s %-12s %-8s %-8s %s" % ('CLOCK', 'INSTRUCTION', 'IPC', 'TIME', 'CPU'))
b["output"].open_perf_buffer(print_data)
# Perf Event for Unhalted Cycles, The hex value is
# combination of event, umask and cmask. Read Intel
# Doc to find the event and cmask. Or use
# perf list --details to get event, umask and cmask
# NOTE: Events can be multiplexed by kernel in case the
# number of counters is greater than supported by CPU
# performance monitoring unit, which can result in inaccurate
# results. Counter values need to be normalized for a more
# accurate value.
PERF_TYPE_RAW = 4
# Unhalted Clock Cycles
b["clk"].open_perf_event(PERF_TYPE_RAW, 0x0000003C)
# Instruction Retired
b["inst"].open_perf_event(PERF_TYPE_RAW, 0x000000C0)
while True:
try:
b.perf_buffer_poll()
except KeyboardInterrupt:
exit()
|