1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
#!/usr/bin/python
# @lint-avoid-python-3-compatibility-imports
#
# dcstat Directory entry cache (dcache) stats.
# For Linux, uses BCC, eBPF.
#
# USAGE: dcstat [interval [count]]
#
# This uses kernel dynamic tracing of kernel functions, lookup_fast() and
# d_lookup(), which will need to be modified to match kernel changes. See
# code comments.
#
# Copyright 2016 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 09-Feb-2016 Brendan Gregg Created this.
from __future__ import print_function
from bcc import BPF
from ctypes import c_int
from time import sleep, strftime
from sys import argv
def usage():
print("USAGE: %s [interval [count]]" % argv[0])
exit()
# arguments
interval = 1
count = -1
if len(argv) > 1:
try:
interval = int(argv[1])
if interval == 0:
raise
if len(argv) > 2:
count = int(argv[2])
except: # also catches -h, --help
usage()
# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
enum stats {
S_REFS = 1,
S_SLOW,
S_MISS,
S_MAXSTAT
};
BPF_ARRAY(stats, u64, S_MAXSTAT);
/*
* How this is instrumented, and how to interpret the statistics, is very much
* tied to the current kernel implementation (this was written on Linux 4.4).
* This will need maintenance to keep working as the implementation changes. To
* aid future adventurers, this is is what the current code does, and why.
*
* First problem: the current implementation takes a path and then does a
* lookup of each component. So how do we count a reference? Once for the path
* lookup, or once for every component lookup? I've chosen the latter
* since it seems to map more closely to actual dcache lookups (via
* __d_lookup_rcu()). It's counted via calls to lookup_fast().
*
* The implementation tries different, progressively slower, approaches to
* lookup a file. At what point do we call it a dcache miss? I've chosen when
* a d_lookup() (which is called during lookup_slow()) returns zero.
*
* I've also included a "SLOW" statistic to show how often the fast lookup
* failed. Whether this exists or is interesting is an implementation detail,
* and the "SLOW" statistic may be removed in future versions.
*/
void count_fast(struct pt_regs *ctx) {
int key = S_REFS;
u64 *leaf = stats.lookup(&key);
if (leaf) (*leaf)++;
}
void count_lookup(struct pt_regs *ctx) {
int key = S_SLOW;
u64 *leaf = stats.lookup(&key);
if (leaf) (*leaf)++;
if (PT_REGS_RC(ctx) == 0) {
key = S_MISS;
leaf = stats.lookup(&key);
if (leaf) (*leaf)++;
}
}
"""
# load BPF program
b = BPF(text=bpf_text)
b.attach_kprobe(event="lookup_fast", fn_name="count_fast")
b.attach_kretprobe(event="d_lookup", fn_name="count_lookup")
# stat column labels and indexes
stats = {
"REFS": 1,
"SLOW": 2,
"MISS": 3
}
# header
print("%-8s " % "TIME", end="")
for stype, idx in sorted(stats.items(), key=lambda k_v: (k_v[1], k_v[0])):
print(" %8s" % (stype + "/s"), end="")
print(" %8s" % "HIT%")
# output
i = 0
while (1):
if count > 0:
i += 1
if i > count:
exit()
try:
sleep(interval)
except KeyboardInterrupt:
pass
exit()
print("%-8s: " % strftime("%H:%M:%S"), end="")
# print each statistic as a column
for stype, idx in sorted(stats.items(), key=lambda k_v: (k_v[1], k_v[0])):
try:
val = b["stats"][c_int(idx)].value / interval
print(" %8d" % val, end="")
except:
print(" %8d" % 0, end="")
# print hit ratio percentage
try:
ref = b["stats"][c_int(stats["REFS"])].value
miss = b["stats"][c_int(stats["MISS"])].value
hit = ref - miss
pct = float(100) * hit / ref
print(" %8.2f" % pct)
except:
print(" %7s%%" % "-")
b["stats"].clear()
|