File: devicefilter.go

package info (click to toggle)
golang-github-opencontainers-cgroups 0.0.4-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 844 kB
  • sloc: makefile: 2
file content (207 lines) | stat: -rw-r--r-- 6,105 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
// Implements creation of eBPF device filter program.
//
// Based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
//
// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
package devices

import (
	"errors"
	"fmt"
	"math"
	"strconv"

	"github.com/cilium/ebpf/asm"
	devices "github.com/opencontainers/cgroups/devices/config"
	"golang.org/x/sys/unix"
)

const (
	// license string format is same as kernel MODULE_LICENSE macro
	license = "Apache"
)

// deviceFilter returns eBPF device filter program and its license string.
func deviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
	// Generate the minimum ruleset for the device rules we are given. While we
	// don't care about minimum transitions in cgroupv2, using the emulator
	// gives us a guarantee that the behaviour of devices filtering is the same
	// as cgroupv1, including security hardenings to avoid misconfiguration
	// (such as punching holes in wildcard rules).
	emu := new(emulator)
	for _, rule := range rules {
		if err := emu.Apply(*rule); err != nil {
			return nil, "", err
		}
	}
	cleanRules, err := emu.Rules()
	if err != nil {
		return nil, "", err
	}

	p := &program{
		defaultAllow: emu.IsBlacklist(),
	}
	p.init()

	for idx, rule := range cleanRules {
		if rule.Type == devices.WildcardDevice {
			// We can safely skip over wildcard entries because there should
			// only be one (at most) at the very start to instruct cgroupv1 to
			// go into allow-list mode. However we do double-check this here.
			if idx != 0 || rule.Allow != emu.IsBlacklist() {
				return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
			}
			continue
		}
		if rule.Allow == p.defaultAllow {
			// There should be no rules which have an action equal to the
			// default action, the emulator removes those.
			return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
		}
		if err := p.appendRule(rule); err != nil {
			return nil, "", err
		}
	}
	return p.finalize(), license, nil
}

type program struct {
	insts        asm.Instructions
	defaultAllow bool
	blockID      int
}

func (p *program) init() {
	// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
	/*
		u32 access_type
		u32 major
		u32 minor
	*/
	// R2 <- type (lower 16 bit of u32 access_type at R1[0])
	p.insts = append(p.insts,
		asm.LoadMem(asm.R2, asm.R1, 0, asm.Word),
		asm.And.Imm32(asm.R2, 0xFFFF))

	// R3 <- access (upper 16 bit of u32 access_type at R1[0])
	p.insts = append(p.insts,
		asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
		// RSh: bitwise shift right
		asm.RSh.Imm32(asm.R3, 16))

	// R4 <- major (u32 major at R1[4])
	p.insts = append(p.insts,
		asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))

	// R5 <- minor (u32 minor at R1[8])
	p.insts = append(p.insts,
		asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
}

// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
// to the in-progress filter program. In order to operate properly, it must be
// called with a "clean" rule list (generated by devices.Emulator.Rules() --
// with any "a" rules removed).
func (p *program) appendRule(rule *devices.Rule) error {
	if p.blockID < 0 {
		return errors.New("the program is finalized")
	}

	var bpfType int32
	switch rule.Type {
	case devices.CharDevice:
		bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
	case devices.BlockDevice:
		bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
	default:
		// We do not permit 'a', nor any other types we don't know about.
		return fmt.Errorf("invalid type %q", string(rule.Type))
	}
	if rule.Major > math.MaxUint32 {
		return fmt.Errorf("invalid major %d", rule.Major)
	}
	if rule.Minor > math.MaxUint32 {
		return fmt.Errorf("invalid minor %d", rule.Major)
	}
	hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
	hasMinor := rule.Minor >= 0
	bpfAccess := int32(0)
	for _, r := range rule.Permissions {
		switch r {
		case 'r':
			bpfAccess |= unix.BPF_DEVCG_ACC_READ
		case 'w':
			bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
		case 'm':
			bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
		default:
			return fmt.Errorf("unknown device access %v", r)
		}
	}
	// If the access is rwm, skip the check.
	hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)

	var (
		blockSym         = "block-" + strconv.Itoa(p.blockID)
		nextBlockSym     = "block-" + strconv.Itoa(p.blockID+1)
		prevBlockLastIdx = len(p.insts) - 1
	)
	p.insts = append(p.insts,
		// if (R2 != bpfType) goto next
		asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
	)
	if hasAccess {
		p.insts = append(p.insts,
			// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
			asm.Mov.Reg32(asm.R1, asm.R3),
			asm.And.Imm32(asm.R1, bpfAccess),
			asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym),
		)
	}
	if hasMajor {
		p.insts = append(p.insts,
			// if (R4 != major) goto next
			asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
		)
	}
	if hasMinor {
		p.insts = append(p.insts,
			// if (R5 != minor) goto next
			asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
		)
	}
	p.insts = append(p.insts, acceptBlock(rule.Allow)...)
	// set blockSym to the first instruction we added in this iteration
	p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].WithSymbol(blockSym)
	p.blockID++
	return nil
}

func (p *program) finalize() asm.Instructions {
	var v int32
	if p.defaultAllow {
		v = 1
	}
	blockSym := "block-" + strconv.Itoa(p.blockID)
	p.insts = append(p.insts,
		// R0 <- v
		asm.Mov.Imm32(asm.R0, v).WithSymbol(blockSym),
		asm.Return(),
	)
	p.blockID = -1
	return p.insts
}

func acceptBlock(accept bool) asm.Instructions {
	var v int32
	if accept {
		v = 1
	}
	return []asm.Instruction{
		// R0 <- v
		asm.Mov.Imm32(asm.R0, v),
		asm.Return(),
	}
}