1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
|
package link
import (
"errors"
"fmt"
"os"
"runtime"
"strings"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/linux"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/tracefs"
"github.com/cilium/ebpf/internal/unix"
)
// KprobeOptions defines additional parameters that will be used
// when loading Kprobes.
type KprobeOptions struct {
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
//
// Needs kernel 5.15+.
Cookie uint64
// Offset of the kprobe relative to the traced symbol.
// Can be used to insert kprobes at arbitrary offsets in kernel functions,
// e.g. in places where functions have been inlined.
Offset uint64
// Increase the maximum number of concurrent invocations of a kretprobe.
// Required when tracing some long running functions in the kernel.
//
// Deprecated: this setting forces the use of an outdated kernel API and is not portable
// across kernel versions.
RetprobeMaxActive int
// Prefix used for the event name if the kprobe must be attached using tracefs.
// The group name will be formatted as `<prefix>_<randomstr>`.
// The default empty string is equivalent to "ebpf" as the prefix.
TraceFSPrefix string
}
func (ko *KprobeOptions) cookie() uint64 {
if ko == nil {
return 0
}
return ko.Cookie
}
// Kprobe attaches the given eBPF program to a perf event that fires when the
// given kernel symbol starts executing. See /proc/kallsyms for available
// symbols. For example, printk():
//
// kp, err := Kprobe("printk", prog, nil)
//
// Losing the reference to the resulting Link (kp) will close the Kprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// If attaching to symbol fails, automatically retries with the running
// platform's syscall prefix (e.g. __x64_) to support attaching to syscalls
// in a portable fashion.
//
// On kernels 6.11 and later, setting a kprobe on a nonexistent symbol using
// tracefs incorrectly returns [unix.EINVAL] instead of [os.ErrNotExist].
//
// The returned Link may implement [PerfEvent].
func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
k, err := kprobe(symbol, prog, opts, false)
if err != nil {
return nil, err
}
lnk, err := attachPerfEvent(k, prog, opts.cookie())
if err != nil {
k.Close()
return nil, err
}
return lnk, nil
}
// Kretprobe attaches the given eBPF program to a perf event that fires right
// before the given kernel symbol exits, with the function stack left intact.
// See /proc/kallsyms for available symbols. For example, printk():
//
// kp, err := Kretprobe("printk", prog, nil)
//
// Losing the reference to the resulting Link (kp) will close the Kretprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// If attaching to symbol fails, automatically retries with the running
// platform's syscall prefix (e.g. __x64_) to support attaching to syscalls
// in a portable fashion.
//
// On kernels 5.10 and earlier, setting a kretprobe on a nonexistent symbol
// incorrectly returns [unix.EINVAL] instead of [os.ErrNotExist].
//
// The returned Link may implement [PerfEvent].
func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
k, err := kprobe(symbol, prog, opts, true)
if err != nil {
return nil, err
}
lnk, err := attachPerfEvent(k, prog, opts.cookie())
if err != nil {
k.Close()
return nil, err
}
return lnk, nil
}
// isValidKprobeSymbol implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_.]*$".
func isValidKprobeSymbol(s string) bool {
if len(s) < 1 {
return false
}
for i, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '_':
case i > 0 && c >= '0' && c <= '9':
// Allow `.` in symbol name. GCC-compiled kernel may change symbol name
// to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`.
// See: https://gcc.gnu.org/gcc-10/changes.html
case i > 0 && c == '.':
default:
return false
}
}
return true
}
// kprobe opens a perf event on the given symbol and attaches prog to it.
// If ret is true, create a kretprobe.
func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) {
if symbol == "" {
return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput)
}
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if !isValidKprobeSymbol(symbol) {
return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput)
}
if prog.Type() != ebpf.Kprobe {
return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput)
}
args := tracefs.ProbeArgs{
Type: tracefs.Kprobe,
Pid: perfAllThreads,
Symbol: symbol,
Ret: ret,
}
if opts != nil {
args.RetprobeMaxActive = opts.RetprobeMaxActive
args.Cookie = opts.Cookie
args.Offset = opts.Offset
args.Group = opts.TraceFSPrefix
}
// Use kprobe PMU if the kernel has it available.
tp, err := pmuProbe(args)
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
if prefix := linux.PlatformPrefix(); prefix != "" {
args.Symbol = prefix + symbol
tp, err = pmuProbe(args)
}
}
if err == nil {
return tp, nil
}
if !errors.Is(err, ErrNotSupported) {
return nil, fmt.Errorf("creating perf_kprobe PMU (arch-specific fallback for %q): %w", symbol, err)
}
// Use tracefs if kprobe PMU is missing.
args.Symbol = symbol
tp, err = tracefsProbe(args)
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
if prefix := linux.PlatformPrefix(); prefix != "" {
args.Symbol = prefix + symbol
tp, err = tracefsProbe(args)
}
}
if err != nil {
return nil, fmt.Errorf("creating tracefs event (arch-specific fallback for %q): %w", symbol, err)
}
return tp, nil
}
// pmuProbe opens a perf event based on a Performance Monitoring Unit.
//
// Requires at least a 4.17 kernel.
// e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU"
// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU"
//
// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU
func pmuProbe(args tracefs.ProbeArgs) (*perfEvent, error) {
// Getting the PMU type will fail if the kernel doesn't support
// the perf_[k,u]probe PMU.
eventType, err := internal.ReadUint64FromFileOnce("%d\n", "/sys/bus/event_source/devices", args.Type.String(), "type")
if errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("%s: %w", args.Type, ErrNotSupported)
}
if err != nil {
return nil, err
}
// Use tracefs if we want to set kretprobe's retprobeMaxActive.
if args.RetprobeMaxActive != 0 {
return nil, fmt.Errorf("pmu probe: non-zero retprobeMaxActive: %w", ErrNotSupported)
}
var config uint64
if args.Ret {
bit, err := internal.ReadUint64FromFileOnce("config:%d\n", "/sys/bus/event_source/devices", args.Type.String(), "/format/retprobe")
if err != nil {
return nil, err
}
config |= 1 << bit
}
var (
attr unix.PerfEventAttr
sp unsafe.Pointer
token string
)
switch args.Type {
case tracefs.Kprobe:
// Create a pointer to a NUL-terminated string for the kernel.
sp, err = unsafeStringPtr(args.Symbol)
if err != nil {
return nil, err
}
token = tracefs.KprobeToken(args)
attr = unix.PerfEventAttr{
// The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. Use Ext2 as probe_offset.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(eventType), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Kernel symbol to trace
Ext2: args.Offset, // Kernel symbol offset
Config: config, // Retprobe flag
}
case tracefs.Uprobe:
sp, err = unsafeStringPtr(args.Path)
if err != nil {
return nil, err
}
if args.RefCtrOffset != 0 {
config |= args.RefCtrOffset << uprobeRefCtrOffsetShift
}
token = tracefs.UprobeToken(args)
attr = unix.PerfEventAttr{
// The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. The Size field controls the
// size of the internal buffer the kernel allocates for reading the
// perf_event_attr argument from userspace.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(eventType), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Uprobe path
Ext2: args.Offset, // Uprobe offset
Config: config, // RefCtrOffset, Retprobe flag
}
}
cpu := 0
if args.Pid != perfAllThreads {
cpu = -1
}
rawFd, err := unix.PerfEventOpen(&attr, args.Pid, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC)
// On some old kernels, kprobe PMU doesn't allow `.` in symbol names and
// return -EINVAL. Return ErrNotSupported to allow falling back to tracefs.
// https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343
if errors.Is(err, unix.EINVAL) && strings.Contains(args.Symbol, ".") {
return nil, fmt.Errorf("token %s: older kernels don't accept dots: %w", token, ErrNotSupported)
}
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a retprobe for a missing symbol.
if errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("token %s: not found: %w", token, err)
}
// Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary. The exact conditions that trigger this error are
// arch specific however.
if errors.Is(err, unix.EILSEQ) {
return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
}
// Since at least commit cb9a19fe4aa51, ENOTSUPP is returned
// when attempting to set a uprobe on a trap instruction.
if errors.Is(err, sys.ENOTSUPP) {
return nil, fmt.Errorf("token %s: failed setting uprobe on offset %#x (possible trap insn): %w", token, args.Offset, err)
}
if err != nil {
return nil, fmt.Errorf("token %s: opening perf event: %w", token, err)
}
// Ensure the string pointer is not collected before PerfEventOpen returns.
runtime.KeepAlive(sp)
fd, err := sys.NewFD(rawFd)
if err != nil {
return nil, err
}
// Kernel has perf_[k,u]probe PMU available, initialize perf event.
return newPerfEvent(fd, nil), nil
}
// tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events.
// A new trace event group name is generated on every call to support creating
// multiple trace events for the same kernel or userspace symbol.
// Path and offset are only set in the case of uprobe(s) and are used to set
// the executable/library path on the filesystem and the offset where the probe is inserted.
// A perf event is then opened on the newly-created trace event and returned to the caller.
func tracefsProbe(args tracefs.ProbeArgs) (*perfEvent, error) {
groupPrefix := "ebpf"
if args.Group != "" {
groupPrefix = args.Group
}
// Generate a random string for each trace event we attempt to create.
// This value is used as the 'group' token in tracefs to allow creating
// multiple kprobe trace events with the same name.
group, err := tracefs.RandomGroup(groupPrefix)
if err != nil {
return nil, fmt.Errorf("randomizing group name: %w", err)
}
args.Group = group
// Create the [k,u]probe trace event using tracefs.
evt, err := tracefs.NewEvent(args)
if err != nil {
return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
}
// Kprobes are ephemeral tracepoints and share the same perf event type.
fd, err := openTracepointPerfEvent(evt.ID(), args.Pid)
if err != nil {
// Make sure we clean up the created tracefs event when we return error.
// If a livepatch handler is already active on the symbol, the write to
// tracefs will succeed, a trace event will show up, but creating the
// perf event will fail with EBUSY.
_ = evt.Close()
return nil, err
}
return newPerfEvent(fd, evt), nil
}
|