1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
|
package memlimit
import (
"bufio"
"errors"
"fmt"
"io"
"math"
"os"
"path/filepath"
"slices"
"strconv"
"strings"
)
var (
// ErrNoCgroup is returned when the process is not in cgroup.
ErrNoCgroup = errors.New("process is not in cgroup")
// ErrCgroupsNotSupported is returned when the system does not support cgroups.
ErrCgroupsNotSupported = errors.New("cgroups is not supported on this system")
)
// fromCgroup retrieves the memory limit from the cgroup.
// The versionDetector function is used to detect the cgroup version from the mountinfo.
func fromCgroup(versionDetector func(mis []mountInfo) (bool, bool)) (uint64, error) {
mf, err := os.Open("/proc/self/mountinfo")
if err != nil {
return 0, fmt.Errorf("failed to open /proc/self/mountinfo: %w", err)
}
defer mf.Close()
mis, err := parseMountInfo(mf)
if err != nil {
return 0, fmt.Errorf("failed to parse mountinfo: %w", err)
}
v1, v2 := versionDetector(mis)
if !(v1 || v2) {
return 0, ErrNoCgroup
}
cf, err := os.Open("/proc/self/cgroup")
if err != nil {
return 0, fmt.Errorf("failed to open /proc/self/cgroup: %w", err)
}
defer cf.Close()
chs, err := parseCgroupFile(cf)
if err != nil {
return 0, fmt.Errorf("failed to parse cgroup file: %w", err)
}
if v2 {
limit, err := getMemoryLimitV2(chs, mis)
if err == nil {
return limit, nil
} else if !v1 {
return 0, err
}
}
return getMemoryLimitV1(chs, mis)
}
// detectCgroupVersion detects the cgroup version from the mountinfo.
func detectCgroupVersion(mis []mountInfo) (bool, bool) {
var v1, v2 bool
for _, mi := range mis {
switch mi.FilesystemType {
case "cgroup":
v1 = true
case "cgroup2":
v2 = true
}
}
return v1, v2
}
// getMemoryLimitV2 retrieves the memory limit from the cgroup v2 controller.
func getMemoryLimitV2(chs []cgroupHierarchy, mis []mountInfo) (uint64, error) {
// find the cgroup v2 path for the memory controller.
// in cgroup v2, the paths are unified and the controller list is empty.
idx := slices.IndexFunc(chs, func(ch cgroupHierarchy) bool {
return ch.HierarchyID == "0" && ch.ControllerList == ""
})
if idx == -1 {
return 0, errors.New("cgroup v2 path not found")
}
relPath := chs[idx].CgroupPath
// find the mountpoint for the cgroup v2 controller.
idx = slices.IndexFunc(mis, func(mi mountInfo) bool {
return mi.FilesystemType == "cgroup2"
})
if idx == -1 {
return 0, errors.New("cgroup v2 mountpoint not found")
}
root, mountPoint := mis[idx].Root, mis[idx].MountPoint
// resolve the actual cgroup path
cgroupPath, err := resolveCgroupPath(mountPoint, root, relPath)
if err != nil {
return 0, err
}
// retrieve the memory limit from the memory.max file
return readMemoryLimitV2FromPath(filepath.Join(cgroupPath, "memory.max"))
}
// readMemoryLimitV2FromPath reads the memory limit for cgroup v2 from the given path.
// this function expects the path to be memory.max file.
func readMemoryLimitV2FromPath(path string) (uint64, error) {
b, err := os.ReadFile(path)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return 0, ErrNoLimit
}
return 0, fmt.Errorf("failed to read memory.max: %w", err)
}
slimit := strings.TrimSpace(string(b))
if slimit == "max" {
return 0, ErrNoLimit
}
limit, err := strconv.ParseUint(slimit, 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse memory.max value: %w", err)
}
return limit, nil
}
// getMemoryLimitV1 retrieves the memory limit from the cgroup v1 controller.
func getMemoryLimitV1(chs []cgroupHierarchy, mis []mountInfo) (uint64, error) {
// find the cgroup v1 path for the memory controller.
idx := slices.IndexFunc(chs, func(ch cgroupHierarchy) bool {
return slices.Contains(strings.Split(ch.ControllerList, ","), "memory")
})
if idx == -1 {
return 0, errors.New("cgroup v1 path for memory controller not found")
}
relPath := chs[idx].CgroupPath
// find the mountpoint for the cgroup v1 controller.
idx = slices.IndexFunc(mis, func(mi mountInfo) bool {
return mi.FilesystemType == "cgroup" && slices.Contains(strings.Split(mi.SuperOptions, ","), "memory")
})
if idx == -1 {
return 0, errors.New("cgroup v1 mountpoint for memory controller not found")
}
root, mountPoint := mis[idx].Root, mis[idx].MountPoint
// resolve the actual cgroup path
cgroupPath, err := resolveCgroupPath(mountPoint, root, relPath)
if err != nil {
return 0, err
}
// retrieve the memory limit from the memory.stat and memory.limit_in_bytes files.
return readMemoryLimitV1FromPath(cgroupPath)
}
// getCgroupV1NoLimit returns the maximum value that is used to represent no limit in cgroup v1.
// the max memory limit is max int64, but it should be multiple of the page size.
func getCgroupV1NoLimit() uint64 {
ps := uint64(os.Getpagesize())
return math.MaxInt64 / ps * ps
}
// readMemoryLimitV1FromPath reads the memory limit for cgroup v1 from the given path.
// this function expects the path to be the cgroup directory.
func readMemoryLimitV1FromPath(cgroupPath string) (uint64, error) {
// read hierarchical_memory_limit and memory.limit_in_bytes files.
// but if hierarchical_memory_limit is not available, then use the max value as a fallback.
hml, err := readHierarchicalMemoryLimit(filepath.Join(cgroupPath, "memory.stat"))
if err != nil && !errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("failed to read hierarchical_memory_limit: %w", err)
} else if hml == 0 {
hml = math.MaxUint64
}
// read memory.limit_in_bytes file.
b, err := os.ReadFile(filepath.Join(cgroupPath, "memory.limit_in_bytes"))
if err != nil && !errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("failed to read memory.limit_in_bytes: %w", err)
}
lib, err := strconv.ParseUint(strings.TrimSpace(string(b)), 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse memory.limit_in_bytes value: %w", err)
} else if lib == 0 {
hml = math.MaxUint64
}
// use the minimum value between hierarchical_memory_limit and memory.limit_in_bytes.
// if the limit is the maximum value, then it is considered as no limit.
limit := min(hml, lib)
if limit >= getCgroupV1NoLimit() {
return 0, ErrNoLimit
}
return limit, nil
}
// readHierarchicalMemoryLimit extracts hierarchical_memory_limit from memory.stat.
// this function expects the path to be memory.stat file.
func readHierarchicalMemoryLimit(path string) (uint64, error) {
file, err := os.Open(path)
if err != nil {
return 0, err
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
fields := strings.Split(line, " ")
if len(fields) < 2 {
return 0, fmt.Errorf("failed to parse memory.stat %q: not enough fields", line)
}
if fields[0] == "hierarchical_memory_limit" {
if len(fields) > 2 {
return 0, fmt.Errorf("failed to parse memory.stat %q: too many fields for hierarchical_memory_limit", line)
}
return strconv.ParseUint(fields[1], 10, 64)
}
}
if err := scanner.Err(); err != nil {
return 0, err
}
return 0, nil
}
// https://www.man7.org/linux/man-pages/man5/proc_pid_mountinfo.5.html
// 731 771 0:59 /sysrq-trigger /proc/sysrq-trigger ro,nosuid,nodev,noexec,relatime - proc proc rw
//
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
//
// (1) mount ID: a unique ID for the mount (may be reused after umount(2)).
// (2) parent ID: the ID of the parent mount (or of self for the root of this mount namespace's mount tree).
// (3) major:minor: the value of st_dev for files on this filesystem (see stat(2)).
// (4) root: the pathname of the directory in the filesystem which forms the root of this mount.
// (5) mount point: the pathname of the mount point relative to the process's root directory.
// (6) mount options: per-mount options (see mount(2)).
// (7) optional fields: zero or more fields of the form "tag[:value]"; see below.
// (8) separator: the end of the optional fields is marked by a single hyphen.
// (9) filesystem type: the filesystem type in the form "type[.subtype]".
// (10) mount source: filesystem-specific information or "none".
// (11) super options: per-superblock options (see mount(2)).
type mountInfo struct {
Root string
MountPoint string
FilesystemType string
SuperOptions string
}
// parseMountInfoLine parses a line from the mountinfo file.
func parseMountInfoLine(line string) (mountInfo, error) {
if line == "" {
return mountInfo{}, errors.New("empty line")
}
fieldss := strings.SplitN(line, " - ", 2)
if len(fieldss) != 2 {
return mountInfo{}, fmt.Errorf("invalid separator")
}
fields1 := strings.SplitN(fieldss[0], " ", 7)
if len(fields1) < 6 {
return mountInfo{}, fmt.Errorf("not enough fields before separator: %v", fields1)
} else if len(fields1) == 6 {
fields1 = append(fields1, "")
}
fields2 := strings.SplitN(fieldss[1], " ", 3)
if len(fields2) < 3 {
return mountInfo{}, fmt.Errorf("not enough fields after separator: %v", fields2)
}
return mountInfo{
Root: fields1[3],
MountPoint: fields1[4],
FilesystemType: fields2[0],
SuperOptions: fields2[2],
}, nil
}
// parseMountInfo parses the mountinfo file.
func parseMountInfo(r io.Reader) ([]mountInfo, error) {
var (
s = bufio.NewScanner(r)
mis []mountInfo
)
for s.Scan() {
line := s.Text()
mi, err := parseMountInfoLine(line)
if err != nil {
return nil, fmt.Errorf("failed to parse mountinfo file %q: %w", line, err)
}
mis = append(mis, mi)
}
if err := s.Err(); err != nil {
return nil, err
}
return mis, nil
}
// https://www.man7.org/linux/man-pages/man7/cgroups.7.html
//
// 5:cpuacct,cpu,cpuset:/daemons
// (1) (2) (3)
//
// (1) hierarchy ID:
//
// cgroups version 1 hierarchies, this field
// contains a unique hierarchy ID number that can be
// matched to a hierarchy ID in /proc/cgroups. For the
// cgroups version 2 hierarchy, this field contains the
// value 0.
//
// (2) controller list:
//
// For cgroups version 1 hierarchies, this field
// contains a comma-separated list of the controllers
// bound to the hierarchy. For the cgroups version 2
// hierarchy, this field is empty.
//
// (3) cgroup path:
//
// This field contains the pathname of the control group
// in the hierarchy to which the process belongs. This
// pathname is relative to the mount point of the
// hierarchy.
type cgroupHierarchy struct {
HierarchyID string
ControllerList string
CgroupPath string
}
// parseCgroupHierarchyLine parses a line from the cgroup file.
func parseCgroupHierarchyLine(line string) (cgroupHierarchy, error) {
if line == "" {
return cgroupHierarchy{}, errors.New("empty line")
}
fields := strings.Split(line, ":")
if len(fields) < 3 {
return cgroupHierarchy{}, fmt.Errorf("not enough fields: %v", fields)
} else if len(fields) > 3 {
return cgroupHierarchy{}, fmt.Errorf("too many fields: %v", fields)
}
return cgroupHierarchy{
HierarchyID: fields[0],
ControllerList: fields[1],
CgroupPath: fields[2],
}, nil
}
// parseCgroupFile parses the cgroup file.
func parseCgroupFile(r io.Reader) ([]cgroupHierarchy, error) {
var (
s = bufio.NewScanner(r)
chs []cgroupHierarchy
)
for s.Scan() {
line := s.Text()
ch, err := parseCgroupHierarchyLine(line)
if err != nil {
return nil, fmt.Errorf("failed to parse cgroup file %q: %w", line, err)
}
chs = append(chs, ch)
}
if err := s.Err(); err != nil {
return nil, err
}
return chs, nil
}
// resolveCgroupPath resolves the actual cgroup path from the mountpoint, root, and cgroupRelPath.
func resolveCgroupPath(mountpoint, root, cgroupRelPath string) (string, error) {
rel, err := filepath.Rel(root, cgroupRelPath)
if err != nil {
return "", err
}
// if the relative path is ".", then the cgroupRelPath is the root itself.
if rel == "." {
return mountpoint, nil
}
// if the relative path starts with "..", then it is outside the root.
if strings.HasPrefix(rel, "..") {
return "", fmt.Errorf("invalid cgroup path: %s is not under root %s", cgroupRelPath, root)
}
return filepath.Join(mountpoint, rel), nil
}
|