1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
|
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Tue, 23 Feb 2021 18:27:42 -0800
Subject: [PATCH 4/5] Fix cgroup2 mount for rootless case
In case of rootless, cgroup2 mount is not possible (see [1] for more
details), so since commit 9c81440fb5a7 runc bind-mounts the whole
/sys/fs/cgroup into container.
Problem is, if cgroupns is enabled, /sys/fs/cgroup inside the container
is supposed to show the cgroup files for this cgroup, not the root one.
The fix is to pass through and use the cgroup path in case cgroup2
mount failed, cgroupns is enabled, and the path is non-empty.
Surely this requires the /sys/fs/cgroup mount in the spec, so modify
runc spec --rootless to keep it.
Before:
$ ./runc run aaa
# find /sys/fs/cgroup/ -type d
/sys/fs/cgroup
/sys/fs/cgroup/user.slice
/sys/fs/cgroup/user.slice/user-1000.slice
/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service
...
# ls -l /sys/fs/cgroup/cgroup.controllers
-r--r--r-- 1 nobody nogroup 0 Feb 24 02:22 /sys/fs/cgroup/cgroup.controllers
# wc -w /sys/fs/cgroup/cgroup.procs
142 /sys/fs/cgroup/cgroup.procs
# cat /sys/fs/cgroup/memory.current
cat: can't open '/sys/fs/cgroup/memory.current': No such file or directory
After:
# find /sys/fs/cgroup/ -type d
/sys/fs/cgroup/
# ls -l /sys/fs/cgroup/cgroup.controllers
-r--r--r-- 1 root root 0 Feb 24 02:43 /sys/fs/cgroup/cgroup.controllers
# wc -w /sys/fs/cgroup/cgroup.procs
2 /sys/fs/cgroup/cgroup.procs
# cat /sys/fs/cgroup/memory.current
577536
[1] https://github.com/opencontainers/runc/issues/2158
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
---
libcontainer/container_linux.go | 3 +++
libcontainer/init_linux.go | 1 +
libcontainer/rootfs_linux.go | 28 +++++++++++++++++++++-------
libcontainer/specconv/example.go | 18 +++++++++---------
4 files changed, 34 insertions(+), 16 deletions(-)
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index b6100aa..1cbc734 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -610,6 +610,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
if len(process.Rlimits) > 0 {
cfg.Rlimits = process.Rlimits
}
+ if cgroups.IsCgroup2UnifiedMode() {
+ cfg.Cgroup2Path = c.cgroupManager.Path("")
+ }
return cfg
}
diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
index c57af0e..6817970 100644
--- a/libcontainer/init_linux.go
+++ b/libcontainer/init_linux.go
@@ -70,6 +70,7 @@ type initConfig struct {
RootlessEUID bool `json:"rootless_euid,omitempty"`
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
SpecState *specs.State `json:"spec_state,omitempty"`
+ Cgroup2Path string `json:"cgroup2_path,omitempty"`
}
type initer interface {
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
index 0f0495b..5d2d74c 100644
--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
@@ -31,9 +31,11 @@ import (
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
type mountConfig struct {
- root string
- label string
- cgroupns bool
+ root string
+ label string
+ cgroup2Path string
+ rootlessCgroups bool
+ cgroupns bool
}
// needsSetupDev returns true if /dev needs to be set up.
@@ -56,9 +58,11 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
}
mountConfig := &mountConfig{
- root: config.Rootfs,
- label: config.MountLabel,
- cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
+ root: config.Rootfs,
+ label: config.MountLabel,
+ cgroup2Path: iConfig.Cgroup2Path,
+ rootlessCgroups: iConfig.RootlessCgroups,
+ cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
}
setupDev := needsSetupDev(config)
for _, m := range config.Mounts {
@@ -307,7 +311,17 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
if err == unix.EPERM || err == unix.EBUSY {
src := fs2.UnifiedMountpoint
- return unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
+ if c.cgroupns && c.cgroup2Path != "" {
+ // Emulate cgroupns by bind-mounting
+ // the container cgroup path rather than
+ // the whole /sys/fs/cgroup.
+ src = c.cgroup2Path
+ }
+ err = unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
+ if err == unix.ENOENT && c.rootlessCgroups {
+ err = nil
+ }
+ return err
}
return err
}
diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go
index 8a201bc..56bab3b 100644
--- a/libcontainer/specconv/example.go
+++ b/libcontainer/specconv/example.go
@@ -2,6 +2,7 @@ package specconv
import (
"os"
+ "path/filepath"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -200,8 +201,14 @@ func ToRootless(spec *specs.Spec) {
// Fix up mounts.
var mounts []specs.Mount
for _, mount := range spec.Mounts {
- // Ignore all mounts that are under /sys.
- if strings.HasPrefix(mount.Destination, "/sys") {
+ // Replace the /sys mount with an rbind.
+ if filepath.Clean(mount.Destination) == "/sys" {
+ mounts = append(mounts, specs.Mount{
+ Source: "/sys",
+ Destination: "/sys",
+ Type: "none",
+ Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
+ })
continue
}
@@ -216,13 +223,6 @@ func ToRootless(spec *specs.Spec) {
mount.Options = options
mounts = append(mounts, mount)
}
- // Add the sysfs mount as an rbind.
- mounts = append(mounts, specs.Mount{
- Source: "/sys",
- Destination: "/sys",
- Type: "none",
- Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
- })
spec.Mounts = mounts
// Remove cgroup settings.
|