File: kprobe.go

package info (click to toggle)
golang-github-cilium-ebpf 0.17.3%2Bds1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 4,684 kB
  • sloc: ansic: 1,259; makefile: 127; python: 113; awk: 29; sh: 24
file content (369 lines) | stat: -rw-r--r-- 12,203 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
package link

import (
	"errors"
	"fmt"
	"os"
	"runtime"
	"strings"
	"unsafe"

	"github.com/cilium/ebpf"
	"github.com/cilium/ebpf/internal"
	"github.com/cilium/ebpf/internal/linux"
	"github.com/cilium/ebpf/internal/sys"
	"github.com/cilium/ebpf/internal/tracefs"
	"github.com/cilium/ebpf/internal/unix"
)

// KprobeOptions defines additional parameters that will be used
// when loading Kprobes.
type KprobeOptions struct {
	// Arbitrary value that can be fetched from an eBPF program
	// via `bpf_get_attach_cookie()`.
	//
	// Needs kernel 5.15+.
	Cookie uint64
	// Offset of the kprobe relative to the traced symbol.
	// Can be used to insert kprobes at arbitrary offsets in kernel functions,
	// e.g. in places where functions have been inlined.
	Offset uint64
	// Increase the maximum number of concurrent invocations of a kretprobe.
	// Required when tracing some long running functions in the kernel.
	//
	// Deprecated: this setting forces the use of an outdated kernel API and is not portable
	// across kernel versions.
	RetprobeMaxActive int
	// Prefix used for the event name if the kprobe must be attached using tracefs.
	// The group name will be formatted as `<prefix>_<randomstr>`.
	// The default empty string is equivalent to "ebpf" as the prefix.
	TraceFSPrefix string
}

func (ko *KprobeOptions) cookie() uint64 {
	if ko == nil {
		return 0
	}
	return ko.Cookie
}

// Kprobe attaches the given eBPF program to a perf event that fires when the
// given kernel symbol starts executing. See /proc/kallsyms for available
// symbols. For example, printk():
//
//	kp, err := Kprobe("printk", prog, nil)
//
// Losing the reference to the resulting Link (kp) will close the Kprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// If attaching to symbol fails, automatically retries with the running
// platform's syscall prefix (e.g. __x64_) to support attaching to syscalls
// in a portable fashion.
//
// On kernels 6.11 and later, setting a kprobe on a nonexistent symbol using
// tracefs incorrectly returns [unix.EINVAL] instead of [os.ErrNotExist].
//
// The returned Link may implement [PerfEvent].
func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
	k, err := kprobe(symbol, prog, opts, false)
	if err != nil {
		return nil, err
	}

	lnk, err := attachPerfEvent(k, prog, opts.cookie())
	if err != nil {
		k.Close()
		return nil, err
	}

	return lnk, nil
}

// Kretprobe attaches the given eBPF program to a perf event that fires right
// before the given kernel symbol exits, with the function stack left intact.
// See /proc/kallsyms for available symbols. For example, printk():
//
//	kp, err := Kretprobe("printk", prog, nil)
//
// Losing the reference to the resulting Link (kp) will close the Kretprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// If attaching to symbol fails, automatically retries with the running
// platform's syscall prefix (e.g. __x64_) to support attaching to syscalls
// in a portable fashion.
//
// On kernels 5.10 and earlier, setting a kretprobe on a nonexistent symbol
// incorrectly returns [unix.EINVAL] instead of [os.ErrNotExist].
//
// The returned Link may implement [PerfEvent].
func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
	k, err := kprobe(symbol, prog, opts, true)
	if err != nil {
		return nil, err
	}

	lnk, err := attachPerfEvent(k, prog, opts.cookie())
	if err != nil {
		k.Close()
		return nil, err
	}

	return lnk, nil
}

// isValidKprobeSymbol implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_.]*$".
func isValidKprobeSymbol(s string) bool {
	if len(s) < 1 {
		return false
	}

	for i, c := range []byte(s) {
		switch {
		case c >= 'a' && c <= 'z':
		case c >= 'A' && c <= 'Z':
		case c == '_':
		case i > 0 && c >= '0' && c <= '9':

		// Allow `.` in symbol name. GCC-compiled kernel may change symbol name
		// to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`.
		// See: https://gcc.gnu.org/gcc-10/changes.html
		case i > 0 && c == '.':

		default:
			return false
		}
	}

	return true
}

// kprobe opens a perf event on the given symbol and attaches prog to it.
// If ret is true, create a kretprobe.
func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) {
	if symbol == "" {
		return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput)
	}
	if prog == nil {
		return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
	}
	if !isValidKprobeSymbol(symbol) {
		return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput)
	}
	if prog.Type() != ebpf.Kprobe {
		return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput)
	}

	args := tracefs.ProbeArgs{
		Type:   tracefs.Kprobe,
		Pid:    perfAllThreads,
		Symbol: symbol,
		Ret:    ret,
	}

	if opts != nil {
		args.RetprobeMaxActive = opts.RetprobeMaxActive
		args.Cookie = opts.Cookie
		args.Offset = opts.Offset
		args.Group = opts.TraceFSPrefix
	}

	// Use kprobe PMU if the kernel has it available.
	tp, err := pmuProbe(args)
	if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
		if prefix := linux.PlatformPrefix(); prefix != "" {
			args.Symbol = prefix + symbol
			tp, err = pmuProbe(args)
		}
	}
	if err == nil {
		return tp, nil
	}
	if !errors.Is(err, ErrNotSupported) {
		return nil, fmt.Errorf("creating perf_kprobe PMU (arch-specific fallback for %q): %w", symbol, err)
	}

	// Use tracefs if kprobe PMU is missing.
	args.Symbol = symbol
	tp, err = tracefsProbe(args)
	if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
		if prefix := linux.PlatformPrefix(); prefix != "" {
			args.Symbol = prefix + symbol
			tp, err = tracefsProbe(args)
		}
	}
	if err != nil {
		return nil, fmt.Errorf("creating tracefs event (arch-specific fallback for %q): %w", symbol, err)
	}

	return tp, nil
}

// pmuProbe opens a perf event based on a Performance Monitoring Unit.
//
// Requires at least a 4.17 kernel.
// e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU"
// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU"
//
// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU
func pmuProbe(args tracefs.ProbeArgs) (*perfEvent, error) {
	// Getting the PMU type will fail if the kernel doesn't support
	// the perf_[k,u]probe PMU.
	eventType, err := internal.ReadUint64FromFileOnce("%d\n", "/sys/bus/event_source/devices", args.Type.String(), "type")
	if errors.Is(err, os.ErrNotExist) {
		return nil, fmt.Errorf("%s: %w", args.Type, ErrNotSupported)
	}
	if err != nil {
		return nil, err
	}

	// Use tracefs if we want to set kretprobe's retprobeMaxActive.
	if args.RetprobeMaxActive != 0 {
		return nil, fmt.Errorf("pmu probe: non-zero retprobeMaxActive: %w", ErrNotSupported)
	}

	var config uint64
	if args.Ret {
		bit, err := internal.ReadUint64FromFileOnce("config:%d\n", "/sys/bus/event_source/devices", args.Type.String(), "/format/retprobe")
		if err != nil {
			return nil, err
		}
		config |= 1 << bit
	}

	var (
		attr  unix.PerfEventAttr
		sp    unsafe.Pointer
		token string
	)
	switch args.Type {
	case tracefs.Kprobe:
		// Create a pointer to a NUL-terminated string for the kernel.
		sp, err = unsafeStringPtr(args.Symbol)
		if err != nil {
			return nil, err
		}

		token = tracefs.KprobeToken(args)

		attr = unix.PerfEventAttr{
			// The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1,
			// since it added the config2 (Ext2) field. Use Ext2 as probe_offset.
			Size:   unix.PERF_ATTR_SIZE_VER1,
			Type:   uint32(eventType),   // PMU event type read from sysfs
			Ext1:   uint64(uintptr(sp)), // Kernel symbol to trace
			Ext2:   args.Offset,         // Kernel symbol offset
			Config: config,              // Retprobe flag
		}
	case tracefs.Uprobe:
		sp, err = unsafeStringPtr(args.Path)
		if err != nil {
			return nil, err
		}

		if args.RefCtrOffset != 0 {
			config |= args.RefCtrOffset << uprobeRefCtrOffsetShift
		}

		token = tracefs.UprobeToken(args)

		attr = unix.PerfEventAttr{
			// The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1,
			// since it added the config2 (Ext2) field. The Size field controls the
			// size of the internal buffer the kernel allocates for reading the
			// perf_event_attr argument from userspace.
			Size:   unix.PERF_ATTR_SIZE_VER1,
			Type:   uint32(eventType),   // PMU event type read from sysfs
			Ext1:   uint64(uintptr(sp)), // Uprobe path
			Ext2:   args.Offset,         // Uprobe offset
			Config: config,              // RefCtrOffset, Retprobe flag
		}
	}

	cpu := 0
	if args.Pid != perfAllThreads {
		cpu = -1
	}
	rawFd, err := unix.PerfEventOpen(&attr, args.Pid, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC)

	// On some old kernels, kprobe PMU doesn't allow `.` in symbol names and
	// return -EINVAL. Return ErrNotSupported to allow falling back to tracefs.
	// https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343
	if errors.Is(err, unix.EINVAL) && strings.Contains(args.Symbol, ".") {
		return nil, fmt.Errorf("token %s: older kernels don't accept dots: %w", token, ErrNotSupported)
	}
	// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
	// when trying to create a retprobe for a missing symbol.
	if errors.Is(err, os.ErrNotExist) {
		return nil, fmt.Errorf("token %s: not found: %w", token, err)
	}
	// Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved
	// to an invalid insn boundary. The exact conditions that trigger this error are
	// arch specific however.
	if errors.Is(err, unix.EILSEQ) {
		return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
	}
	// Since at least commit cb9a19fe4aa51, ENOTSUPP is returned
	// when attempting to set a uprobe on a trap instruction.
	if errors.Is(err, sys.ENOTSUPP) {
		return nil, fmt.Errorf("token %s: failed setting uprobe on offset %#x (possible trap insn): %w", token, args.Offset, err)
	}

	if err != nil {
		return nil, fmt.Errorf("token %s: opening perf event: %w", token, err)
	}

	// Ensure the string pointer is not collected before PerfEventOpen returns.
	runtime.KeepAlive(sp)

	fd, err := sys.NewFD(rawFd)
	if err != nil {
		return nil, err
	}

	// Kernel has perf_[k,u]probe PMU available, initialize perf event.
	return newPerfEvent(fd, nil), nil
}

// tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events.
// A new trace event group name is generated on every call to support creating
// multiple trace events for the same kernel or userspace symbol.
// Path and offset are only set in the case of uprobe(s) and are used to set
// the executable/library path on the filesystem and the offset where the probe is inserted.
// A perf event is then opened on the newly-created trace event and returned to the caller.
func tracefsProbe(args tracefs.ProbeArgs) (*perfEvent, error) {
	groupPrefix := "ebpf"
	if args.Group != "" {
		groupPrefix = args.Group
	}

	// Generate a random string for each trace event we attempt to create.
	// This value is used as the 'group' token in tracefs to allow creating
	// multiple kprobe trace events with the same name.
	group, err := tracefs.RandomGroup(groupPrefix)
	if err != nil {
		return nil, fmt.Errorf("randomizing group name: %w", err)
	}
	args.Group = group

	// Create the [k,u]probe trace event using tracefs.
	evt, err := tracefs.NewEvent(args)
	if err != nil {
		return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
	}

	// Kprobes are ephemeral tracepoints and share the same perf event type.
	fd, err := openTracepointPerfEvent(evt.ID(), args.Pid)
	if err != nil {
		// Make sure we clean up the created tracefs event when we return error.
		// If a livepatch handler is already active on the symbol, the write to
		// tracefs will succeed, a trace event will show up, but creating the
		// perf event will fail with EBUSY.
		_ = evt.Close()
		return nil, err
	}

	return newPerfEvent(fd, evt), nil
}