File: rc93-0004-Fix-cgroup2-mount-for-rootless-case.patch

package info (click to toggle)
runc 1.0.0~rc93%2Bds1-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,172 kB
  • sloc: sh: 1,679; ansic: 1,039; makefile: 139
file content (176 lines) | stat: -rw-r--r-- 5,954 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Tue, 23 Feb 2021 18:27:42 -0800
Subject: [PATCH 4/5] Fix cgroup2 mount for rootless case

In case of rootless, cgroup2 mount is not possible (see [1] for more
details), so since commit 9c81440fb5a7 runc bind-mounts the whole
/sys/fs/cgroup into container.

Problem is, if cgroupns is enabled, /sys/fs/cgroup inside the container
is supposed to show the cgroup files for this cgroup, not the root one.

The fix is to pass through and use the cgroup path in case cgroup2
mount failed, cgroupns is enabled, and the path is non-empty.

Surely this requires the /sys/fs/cgroup mount in the spec, so modify
runc spec --rootless to keep it.

Before:

	$ ./runc run aaa
	# find /sys/fs/cgroup/ -type d
	/sys/fs/cgroup
	/sys/fs/cgroup/user.slice
	/sys/fs/cgroup/user.slice/user-1000.slice
	/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service
	...
	# ls -l /sys/fs/cgroup/cgroup.controllers
	-r--r--r--    1 nobody   nogroup          0 Feb 24 02:22 /sys/fs/cgroup/cgroup.controllers
	# wc -w /sys/fs/cgroup/cgroup.procs
	142 /sys/fs/cgroup/cgroup.procs
	# cat /sys/fs/cgroup/memory.current
	cat: can't open '/sys/fs/cgroup/memory.current': No such file or directory

After:

	# find /sys/fs/cgroup/ -type d
	/sys/fs/cgroup/
	# ls -l /sys/fs/cgroup/cgroup.controllers
	-r--r--r--    1 root     root             0 Feb 24 02:43 /sys/fs/cgroup/cgroup.controllers
	# wc -w /sys/fs/cgroup/cgroup.procs
	2 /sys/fs/cgroup/cgroup.procs
	# cat /sys/fs/cgroup/memory.current
	577536

[1] https://github.com/opencontainers/runc/issues/2158

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
---
 libcontainer/container_linux.go  |  3 +++
 libcontainer/init_linux.go       |  1 +
 libcontainer/rootfs_linux.go     | 28 +++++++++++++++++++++-------
 libcontainer/specconv/example.go | 18 +++++++++---------
 4 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index b6100aa..1cbc734 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -610,6 +610,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
 	if len(process.Rlimits) > 0 {
 		cfg.Rlimits = process.Rlimits
 	}
+	if cgroups.IsCgroup2UnifiedMode() {
+		cfg.Cgroup2Path = c.cgroupManager.Path("")
+	}
 
 	return cfg
 }
diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
index c57af0e..6817970 100644
--- a/libcontainer/init_linux.go
+++ b/libcontainer/init_linux.go
@@ -70,6 +70,7 @@ type initConfig struct {
 	RootlessEUID     bool                  `json:"rootless_euid,omitempty"`
 	RootlessCgroups  bool                  `json:"rootless_cgroups,omitempty"`
 	SpecState        *specs.State          `json:"spec_state,omitempty"`
+	Cgroup2Path      string                `json:"cgroup2_path,omitempty"`
 }
 
 type initer interface {
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
index 0f0495b..5d2d74c 100644
--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
@@ -31,9 +31,11 @@ import (
 const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
 
 type mountConfig struct {
-	root     string
-	label    string
-	cgroupns bool
+	root            string
+	label           string
+	cgroup2Path     string
+	rootlessCgroups bool
+	cgroupns        bool
 }
 
 // needsSetupDev returns true if /dev needs to be set up.
@@ -56,9 +58,11 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
 	}
 
 	mountConfig := &mountConfig{
-		root:     config.Rootfs,
-		label:    config.MountLabel,
-		cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
+		root:            config.Rootfs,
+		label:           config.MountLabel,
+		cgroup2Path:     iConfig.Cgroup2Path,
+		rootlessCgroups: iConfig.RootlessCgroups,
+		cgroupns:        config.Namespaces.Contains(configs.NEWCGROUP),
 	}
 	setupDev := needsSetupDev(config)
 	for _, m := range config.Mounts {
@@ -307,7 +311,17 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
 		// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
 		if err == unix.EPERM || err == unix.EBUSY {
 			src := fs2.UnifiedMountpoint
-			return unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
+			if c.cgroupns && c.cgroup2Path != "" {
+				// Emulate cgroupns by bind-mounting
+				// the container cgroup path rather than
+				// the whole /sys/fs/cgroup.
+				src = c.cgroup2Path
+			}
+			err = unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
+			if err == unix.ENOENT && c.rootlessCgroups {
+				err = nil
+			}
+			return err
 		}
 		return err
 	}
diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go
index 8a201bc..56bab3b 100644
--- a/libcontainer/specconv/example.go
+++ b/libcontainer/specconv/example.go
@@ -2,6 +2,7 @@ package specconv
 
 import (
 	"os"
+	"path/filepath"
 	"strings"
 
 	"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -200,8 +201,14 @@ func ToRootless(spec *specs.Spec) {
 	// Fix up mounts.
 	var mounts []specs.Mount
 	for _, mount := range spec.Mounts {
-		// Ignore all mounts that are under /sys.
-		if strings.HasPrefix(mount.Destination, "/sys") {
+		// Replace the /sys mount with an rbind.
+		if filepath.Clean(mount.Destination) == "/sys" {
+			mounts = append(mounts, specs.Mount{
+				Source:      "/sys",
+				Destination: "/sys",
+				Type:        "none",
+				Options:     []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
+			})
 			continue
 		}
 
@@ -216,13 +223,6 @@ func ToRootless(spec *specs.Spec) {
 		mount.Options = options
 		mounts = append(mounts, mount)
 	}
-	// Add the sysfs mount as an rbind.
-	mounts = append(mounts, specs.Mount{
-		Source:      "/sys",
-		Destination: "/sys",
-		Type:        "none",
-		Options:     []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
-	})
 	spec.Mounts = mounts
 
 	// Remove cgroup settings.