File: rlimit_linux.go

package info (click to toggle)
golang-github-cilium-ebpf 0.17.3%2Bds1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 4,684 kB
  • sloc: ansic: 1,259; makefile: 127; python: 113; awk: 29; sh: 24
file content (122 lines) | stat: -rw-r--r-- 4,029 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package rlimit

import (
	"errors"
	"fmt"
	"sync"

	"github.com/cilium/ebpf/internal"
	"github.com/cilium/ebpf/internal/sys"
	"github.com/cilium/ebpf/internal/unix"
)

var (
	unsupportedMemcgAccounting = &internal.UnsupportedFeatureError{
		MinimumVersion: internal.Version{5, 11, 0},
		Name:           "memcg-based accounting for BPF memory",
	}
	haveMemcgAccounting error

	rlimitMu sync.Mutex
)

func init() {
	// We have to run this feature test at init, since it relies on changing
	// RLIMIT_MEMLOCK. Doing so is not safe in a concurrent program. Instead,
	// we rely on the initialization order guaranteed by the Go runtime to
	// execute the test in a safe environment:
	//
	//    the invocation of init functions happens in a single goroutine,
	//    sequentially, one package at a time.
	//
	// This is also the reason why RemoveMemlock is in its own package:
	// we only want to run the initializer if RemoveMemlock is called
	// from somewhere.
	haveMemcgAccounting = detectMemcgAccounting()
}

func detectMemcgAccounting() error {
	// Retrieve the original limit to prevent lowering Max, since
	// doing so is a permanent operation when running unprivileged.
	var oldLimit unix.Rlimit
	if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, nil, &oldLimit); err != nil {
		return fmt.Errorf("getting original memlock rlimit: %s", err)
	}

	// Drop the current limit to zero, maintaining the old Max value.
	// This is always permitted by the kernel for unprivileged users.
	// Retrieve a new copy of the old limit tuple to minimize the chances
	// of failing the restore operation below.
	zeroLimit := unix.Rlimit{Cur: 0, Max: oldLimit.Max}
	if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &zeroLimit, &oldLimit); err != nil {
		return fmt.Errorf("lowering memlock rlimit: %s", err)
	}

	attr := sys.MapCreateAttr{
		MapType:    2, /* Array */
		KeySize:    4,
		ValueSize:  4,
		MaxEntries: 1,
	}

	// Creating a map allocates shared (and locked) memory that counts against
	// the rlimit on pre-5.11 kernels, but against the memory cgroup budget on
	// kernels 5.11 and over. If this call succeeds with the process' memlock
	// rlimit set to 0, we can reasonably assume memcg accounting is supported.
	fd, mapErr := sys.MapCreate(&attr)

	// Restore old limits regardless of what happened.
	if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &oldLimit, nil); err != nil {
		return fmt.Errorf("restoring old memlock rlimit: %s", err)
	}

	// Map creation successful, memcg accounting supported.
	if mapErr == nil {
		fd.Close()
		return nil
	}

	// EPERM shows up when map creation would exceed the memory budget.
	if errors.Is(mapErr, unix.EPERM) {
		return unsupportedMemcgAccounting
	}

	// This shouldn't happen really.
	return fmt.Errorf("unexpected error detecting memory cgroup accounting: %s", mapErr)
}

// RemoveMemlock removes the limit on the amount of memory the current
// process can lock into RAM, if necessary.
//
// This is not required to load eBPF resources on kernel versions 5.11+
// due to the introduction of cgroup-based memory accounting. On such kernels
// the function is a no-op.
//
// Since the function may change global per-process limits it should be invoked
// at program start up, in main() or init().
//
// This function exists as a convenience and should only be used when
// permanently raising RLIMIT_MEMLOCK to infinite is appropriate. Consider
// invoking prlimit(2) directly with a more reasonable limit if desired.
//
// Requires CAP_SYS_RESOURCE on kernels < 5.11.
func RemoveMemlock() error {
	if haveMemcgAccounting == nil {
		return nil
	}

	if !errors.Is(haveMemcgAccounting, unsupportedMemcgAccounting) {
		return haveMemcgAccounting
	}

	rlimitMu.Lock()
	defer rlimitMu.Unlock()

	// pid 0 affects the current process. Requires CAP_SYS_RESOURCE.
	newLimit := unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY}
	if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &newLimit, nil); err != nil {
		return fmt.Errorf("failed to set memlock rlimit: %w", err)
	}

	return nil
}