1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
|
// Copyright (c) 2021-2022, Sylabs Inc. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.
package cgroups
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
lccgroups "github.com/opencontainers/runc/libcontainer/cgroups"
lcmanager "github.com/opencontainers/runc/libcontainer/cgroups/manager"
lcconfigs "github.com/opencontainers/runc/libcontainer/configs"
lcspecconv "github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sylabs/singularity/v4/internal/pkg/util/env"
"github.com/sylabs/singularity/v4/pkg/sylog"
)
var ErrUnitialized = errors.New("cgroups manager is not initialized")
// Manager provides functions to modify, freeze, thaw, and destroy a cgroup.
// Singularity's cgroups.Manager is a wrapper around runc/libcontainer/cgroups.
// The manager supports v1 cgroups, and v2 cgroups with a unified hierarchy.
// Resource specifications are handles in specs.LinuxResources format and
// translated to runc/libcontainer/cgroups format internally.
type Manager struct {
// The name of the cgroup
group string
// Are we using systemd?
systemd bool
// The underlying runc/libcontainer/cgroups manager
cgroup lccgroups.Manager
}
// GetCgroupRootPath returns the cgroups mount root path, for the managed cgroup
func (m *Manager) GetCgroupRootPath() (rootPath string, err error) {
if m.group == "" || m.cgroup == nil {
return "", ErrUnitialized
}
// v2 - has a single fixed mountpoint for the root cgroup
if lccgroups.IsCgroup2UnifiedMode() {
return unifiedMountPoint, nil
}
// v1 - Get absolute paths to cgroup by subsystem
subPaths := m.cgroup.GetPaths()
// For cgroups v1 we are relying on fetching the 'devices' subsystem path.
// The devices subsystem is needed for our OCI engine and its presence is
// enforced in runc/libcontainer/cgroups/fs initialization without 'skipDevices'.
// This means we never explicitly put a container into a cgroup without a
// set 'devices' path.
devicePath, ok := subPaths["devices"]
if !ok {
return "", fmt.Errorf("could not find devices controller path")
}
// Take the piece before the first occurrence of "devices" as the root.
// I.E. /sys/fs/cgroup/devices/singularity/196219 -> /sys/fs/cgroup
pathParts := strings.SplitN(devicePath, "devices", 2)
if len(pathParts) != 2 {
return "", fmt.Errorf("could not find devices controller path")
}
return filepath.Clean(pathParts[0]), nil
}
// GetCgroupRelPath returns the relative path of the cgroup under the mount point
func (m *Manager) GetCgroupRelPath() (relPath string, err error) {
if m.group == "" || m.cgroup == nil {
return "", ErrUnitialized
}
// v2 - has a single fixed mountpoint for the root cgroup
if lccgroups.IsCgroup2UnifiedMode() {
absPath := m.cgroup.Path("")
return strings.TrimPrefix(absPath, unifiedMountPoint), nil
}
// v1 - Get absolute paths to cgroup by subsystem
subPaths := m.cgroup.GetPaths()
// For cgroups v1 we are relying on fetching the 'devices' subsystem path.
// The devices subsystem is needed for our OCI engine and its presence is
// enforced in runc/libcontainer/cgroups/fs initialization without 'skipDevices'.
// This means we never explicitly put a container into a cgroup without a
// set 'devices' path.
devicePath, ok := subPaths["devices"]
if !ok {
return "", fmt.Errorf("could not find devices controller path")
}
// Take the piece after the first occurrence of "devices" as the relative path.
// I.E. /sys/fs/cgroup/devices/singularity/196219 -> /singularity/196219
pathParts := strings.SplitN(devicePath, "devices", 2)
if len(pathParts) != 2 {
return "", fmt.Errorf("could not find devices controller path")
}
return filepath.Clean(pathParts[1]), nil
}
// GetStats wraps the Manager.GetStats from runc
func (m *Manager) GetStats() (*lccgroups.Stats, error) {
stats, err := m.cgroup.GetStats()
if err != nil {
return &lccgroups.Stats{}, fmt.Errorf("could not get stats from cgroups manager: %x", err)
}
return stats, nil
}
// UpdateFromSpec updates the existing managed cgroup using configuration from
// an OCI LinuxResources spec struct.
func (m *Manager) UpdateFromSpec(resources *specs.LinuxResources) (err error) {
if m.group == "" || m.cgroup == nil {
return ErrUnitialized
}
spec := &specs.Spec{
Linux: &specs.Linux{
CgroupsPath: m.group,
Resources: resources,
},
}
opts := &lcspecconv.CreateOpts{
CgroupName: m.group,
UseSystemdCgroup: false,
RootlessCgroups: os.Getuid() != 0,
Spec: spec,
}
lcConfig, err := lcspecconv.CreateCgroupConfig(opts, nil)
if err != nil {
return fmt.Errorf("could not create cgroup config: %w", err)
}
// runc/libcontainer/cgroups defaults to a deny-all policy, while
// singularity has always allowed access to devices by default. If no device
// rules are provided in the spec, then skip setting them so the deny-all is
// not applied when we update the cgroup.
if len(resources.Devices) == 0 {
lcConfig.SkipDevices = true
}
err = m.cgroup.Set(lcConfig.Resources)
if err != nil {
return fmt.Errorf("while setting cgroup limits: %w", err)
}
return nil
}
// UpdateFromFile updates the existing managed cgroup using configuration
// from a toml file.
func (m *Manager) UpdateFromFile(path string) error {
spec, err := LoadResources(path)
if err != nil {
return fmt.Errorf("while loading cgroups file %s: %w", path, err)
}
return m.UpdateFromSpec(&spec)
}
// AddProc adds the process with specified pid to the managed cgroup
//
// Disable context check as it raises a warning throuch lcmanager.New, which is
// in a dependency we cannot modify to pass a context.
func (m *Manager) AddProc(pid int) (err error) {
if m.group == "" || m.cgroup == nil {
return ErrUnitialized
}
if pid == 0 {
return fmt.Errorf("cannot add a zero pid to cgroup")
}
// If we are managing cgroupfs directly we are good to go.
procMgr := m.cgroup
// However, the systemd manager won't put another process in the cgroup...
// so we use an underlying cgroupfs manager for this particular operation.
if m.systemd {
relPath, err := m.GetCgroupRelPath()
if err != nil {
return err
}
lcConfig := &lcconfigs.Cgroup{
Path: relPath,
Resources: &lcconfigs.Resources{},
Systemd: false,
}
procMgr, err = lcmanager.New(lcConfig)
if err != nil {
return fmt.Errorf("while creating cgroupfs manager: %w", err)
}
}
return procMgr.Apply(pid)
}
// Freeze freezes processes in the managed cgroup.
func (m *Manager) Freeze() (err error) {
if m.group == "" || m.cgroup == nil {
return ErrUnitialized
}
return m.cgroup.Freeze(lcconfigs.Frozen)
}
// Thaw unfreezes process in the managed cgroup.
func (m *Manager) Thaw() (err error) {
if m.group == "" || m.cgroup == nil {
return ErrUnitialized
}
return m.cgroup.Freeze(lcconfigs.Thawed)
}
// Destroy deletes the managed cgroup.
func (m *Manager) Destroy() (err error) {
if m.group == "" || m.cgroup == nil {
return ErrUnitialized
}
return m.cgroup.Destroy()
}
// checkRootless identifies if rootless cgroups are required / supported
func checkRootless(group string, systemd bool) (rootless bool, err error) {
if os.Getuid() == 0 {
if systemd {
if !strings.HasPrefix(group, "system.slice:") {
return false, fmt.Errorf("systemd cgroups require a cgroups path beginning with 'system.slice:'")
}
}
return false, nil
}
if !cgroups.IsCgroup2HybridMode() && !cgroups.IsCgroup2UnifiedMode() {
return false, fmt.Errorf("rootless cgroups requires cgroups v2")
}
if !systemd {
return false, fmt.Errorf("rootless cgroups require 'systemd cgroups' to be enabled in singularity.conf")
}
if os.Getenv("XDG_RUNTIME_DIR") == "" || os.Getenv("DBUS_SESSION_BUS_ADDRESS") == "" {
return false, fmt.Errorf("rootless cgroups require a D-Bus session - check that XDG_RUNTIME_DIR and DBUS_SESSION_BUS_ADDRESS are set")
}
if !strings.HasPrefix(group, "user.slice:") {
return false, fmt.Errorf("rootless cgroups require a cgroups path beginning with 'user.slice:'")
}
return true, nil
}
// newManager creates a new Manager, with the associated resources and cgroup.
// The Manager is ready to manage the cgroup but does not apply limits etc.
//
// Disable context check as it raises a warning throuch lcmanager.New, which is
// in a dependency we cannot modify to pass a context.
func newManager(resources *specs.LinuxResources, group string, systemd bool) (manager *Manager, err error) {
if resources == nil {
return nil, fmt.Errorf("non-nil cgroup LinuxResources definition is required")
}
if group == "" {
return nil, fmt.Errorf("a cgroup name/path must is required")
}
rootless, err := checkRootless(group, systemd)
if err != nil {
return nil, err
}
// Rootless manager code invokes systemctl, which it expects to be on PATH.
// Must set default PATH as starter sets up a very stripped down environment.
if rootless {
sylog.Debugf("Using rootless cgroups")
oldPath := os.Getenv("PATH")
if err := os.Setenv("PATH", env.DefaultPath); err != nil {
return nil, fmt.Errorf("could not set default PATH for cgroups manager to locate systemctl: %w", err)
}
defer os.Setenv("PATH", oldPath)
if len(resources.Devices) > 0 {
sylog.Warningf("Device limits will not be applied with rootless cgroups")
}
}
spec := &specs.Spec{
Linux: &specs.Linux{
CgroupsPath: group,
Resources: resources,
},
}
opts := &lcspecconv.CreateOpts{
CgroupName: group,
UseSystemdCgroup: systemd,
RootlessCgroups: rootless,
Spec: spec,
}
lcConfig, err := lcspecconv.CreateCgroupConfig(opts, nil)
if err != nil {
return nil, fmt.Errorf("could not create cgroup config: %w", err)
}
// runc/libcontainer/cgroups defaults to a deny-all policy, while
// singularity has always allowed access to devices by default.
if len(resources.Devices) == 0 {
resources.Devices = []specs.LinuxDeviceCgroup{
{
Allow: true,
Access: "rwm",
},
}
}
cgroup, err := lcmanager.New(lcConfig)
if err != nil {
return nil, fmt.Errorf("while creating cgroup manager: %w", err)
}
mgr := Manager{
group: group,
systemd: systemd,
cgroup: cgroup,
}
return &mgr, nil
}
// NewManagerWithSpec creates a Manager, applies the configuration in spec, and adds pid to the cgroup.
// If a group name is supplied, it will be used by the manager.
// If group = "" then "/singularity/<pid>" is used as a default.
func NewManagerWithSpec(spec *specs.LinuxResources, pid int, group string, systemd bool) (manager *Manager, err error) {
if pid == 0 {
return nil, fmt.Errorf("a pid is required to create a new cgroup")
}
if group == "" {
group = DefaultPathForPid(systemd, pid)
}
sylog.Debugf("Creating cgroups manager for %s", group)
// Create the manager
mgr, err := newManager(spec, group, systemd)
if err != nil {
return nil, err
}
// Apply the cgroup to pid (add pid to cgroup)
if err := mgr.cgroup.Apply(pid); err != nil {
return nil, err
}
if err := mgr.UpdateFromSpec(spec); err != nil {
return nil, err
}
return mgr, nil
}
// NewManagerWithJSON creates a Manager, applies the JSON configuration supplied, and adds pid to the cgroup.
// If a group name is supplied, it will be used by the manager.
// If group = "" then "/singularity/<pid>" is used as a default.
func NewManagerWithJSON(jsonSpec string, pid int, group string, systemd bool) (manager *Manager, err error) {
spec, err := UnmarshalJSONResources(jsonSpec)
if err != nil {
return nil, fmt.Errorf("while loading cgroups spec: %w", err)
}
return NewManagerWithSpec(spec, pid, group, systemd)
}
// NewManagerWithFile creates a Manager, applies the configuration at specPath, and adds pid to the cgroup.
// If a group name is supplied, it will be used by the manager.
// If group = "" then "/singularity/<pid>" is used as a default.
func NewManagerWithFile(specPath string, pid int, group string, systemd bool) (manager *Manager, err error) {
spec, err := LoadResources(specPath)
if err != nil {
return nil, fmt.Errorf("while loading cgroups spec: %w", err)
}
return NewManagerWithSpec(&spec, pid, group, systemd)
}
// GetManager returns a Manager for the provided cgroup name/path.
// It can only return a cgroupfs manager, as we aren't wiring back up to systemd
// through dbus etc.
//
// Disable context check as it raises a warning throuch lcmanager.New, which is
// in a dependency we cannot modify to pass a context.
func GetManagerForGroup(group string) (manager *Manager, err error) {
if group == "" {
return nil, fmt.Errorf("cannot load cgroup - no name/path specified")
}
// Create an empty runc/libcontainer/configs resource spec directly.
// We could call newManager() with an empty LinuxResources spec, but this
// saves the specconv processing.
lcConfig := &lcconfigs.Cgroup{
Path: group,
Resources: &lcconfigs.Resources{},
Systemd: false,
}
cgroup, err := lcmanager.New(lcConfig)
if err != nil {
return nil, fmt.Errorf("while creating cgroup manager: %w", err)
}
mgr := Manager{
group: group,
systemd: false,
cgroup: cgroup,
}
return &mgr, nil
}
// GetManagerFromPid returns a Manager for the cgroup that pid is a member of.
// It can only return a cgroupfs manager, as we aren't wiring back up to systemd
// through dbus etc.
func GetManagerForPid(pid int) (manager *Manager, err error) {
path, err := pidToPath(pid)
if err != nil {
return nil, err
}
return GetManagerForGroup(path)
}
|