File: sysinfo_linux.go

package info (click to toggle)
docker.io 28.5.2%2Bdfsg1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 69,048 kB
  • sloc: sh: 5,867; makefile: 863; ansic: 184; python: 162; asm: 159
file content (419 lines) | stat: -rw-r--r-- 12,401 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
package sysinfo

import (
	"context"
	"fmt"
	"os"
	"path"
	"strconv"
	"strings"
	"sync"

	"github.com/containerd/cgroups/v3"
	"github.com/containerd/cgroups/v3/cgroup1"
	"github.com/containerd/containerd/v2/pkg/seccomp"
	"github.com/containerd/log"
	"github.com/moby/sys/mountinfo"
)

var (
	readMountinfoOnce sync.Once
	readMountinfoErr  error
	cgroupMountinfo   []*mountinfo.Info
)

// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones
// with fstype of "cgroup") for the current running process.
//
// The results are cached (to avoid re-reading mountinfo which is relatively
// expensive), so it is assumed that cgroup mounts are not being changed.
func readCgroupMountinfo() ([]*mountinfo.Info, error) {
	readMountinfoOnce.Do(func() {
		cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts(
			mountinfo.FSTypeFilter("cgroup"),
		)
	})

	return cgroupMountinfo, readMountinfoErr
}

func findCgroupV1Mountpoints() (map[string]string, error) {
	mounts, err := readCgroupMountinfo()
	if err != nil {
		return nil, err
	}

	allSubsystems, err := cgroup1.ParseCgroupFile("/proc/self/cgroup")
	if err != nil {
		return nil, fmt.Errorf("Failed to parse cgroup information: %v", err)
	}

	allMap := make(map[string]bool)
	for s := range allSubsystems {
		allMap[s] = false
	}

	mps := make(map[string]string)
	for _, mi := range mounts {
		for _, opt := range strings.Split(mi.VFSOptions, ",") {
			seen, known := allMap[opt]
			if known && !seen {
				allMap[opt] = true
				mps[strings.TrimPrefix(opt, "name=")] = mi.Mountpoint
			}
		}
		if len(mps) >= len(allMap) {
			break
		}
	}
	return mps, nil
}

type infoCollector func(info *SysInfo)

// WithCgroup2GroupPath specifies the cgroup v2 group path to inspect availability
// of the controllers.
//
// WithCgroup2GroupPath is expected to be used for rootless mode with systemd driver.
//
// e.g. g = "/user.slice/user-1000.slice/user@1000.service"
func WithCgroup2GroupPath(g string) Opt {
	return func(o *SysInfo) {
		if p := path.Clean(g); p != "" {
			o.cg2GroupPath = p
		}
	}
}

// New returns a new SysInfo, using the filesystem to detect which features
// the kernel supports.
func New(options ...Opt) *SysInfo {
	if cgroups.Mode() == cgroups.Unified {
		return newV2(options...)
	}
	return newV1()
}

func newV1() *SysInfo {
	var (
		err     error
		sysInfo = &SysInfo{}
	)

	ops := []infoCollector{
		applyNetworkingInfo,
		applyAppArmorInfo,
		applySeccompInfo,
		applyCgroupNsInfo,
	}

	sysInfo.cgMounts, err = findCgroupV1Mountpoints()
	if err != nil {
		log.G(context.TODO()).Warn(err)
	} else {
		ops = append(ops,
			applyMemoryCgroupInfo,
			applyCPUCgroupInfo,
			applyBlkioCgroupInfo,
			applyCPUSetCgroupInfo,
			applyPIDSCgroupInfo,
			applyDevicesCgroupInfo,
		)
	}

	for _, o := range ops {
		o(sysInfo)
	}
	return sysInfo
}

// applyMemoryCgroupInfo adds the memory cgroup controller information to the info.
func applyMemoryCgroupInfo(info *SysInfo) {
	mountPoint, ok := info.cgMounts["memory"]
	if !ok {
		info.Warnings = append(info.Warnings, "Your kernel does not support cgroup memory limit")
		return
	}
	info.MemoryLimit = ok

	info.SwapLimit = cgroupEnabled(mountPoint, "memory.memsw.limit_in_bytes")
	if !info.SwapLimit {
		info.Warnings = append(info.Warnings, "Your kernel does not support swap memory limit")
	}
	info.MemoryReservation = cgroupEnabled(mountPoint, "memory.soft_limit_in_bytes")
	if !info.MemoryReservation {
		info.Warnings = append(info.Warnings, "Your kernel does not support memory reservation")
	}
	info.OomKillDisable = cgroupEnabled(mountPoint, "memory.oom_control")
	if !info.OomKillDisable {
		info.Warnings = append(info.Warnings, "Your kernel does not support oom control")
	}
	info.MemorySwappiness = cgroupEnabled(mountPoint, "memory.swappiness")
	if !info.MemorySwappiness {
		info.Warnings = append(info.Warnings, "Your kernel does not support memory swappiness")
	}

	// Option is deprecated, but still accepted on API < v1.42 with cgroups v1,
	// so setting the field to allow feature detection.
	info.KernelMemory = cgroupEnabled(mountPoint, "memory.kmem.limit_in_bytes")

	// Option is deprecated in runc, but still accepted in our API, so setting
	// the field to allow feature detection, but don't warn if it's missing, to
	// make the daemon logs a bit less noisy.
	info.KernelMemoryTCP = cgroupEnabled(mountPoint, "memory.kmem.tcp.limit_in_bytes")
}

// applyCPUCgroupInfo adds the cpu cgroup controller information to the info.
func applyCPUCgroupInfo(info *SysInfo) {
	mountPoint, ok := info.cgMounts["cpu"]
	if !ok {
		info.Warnings = append(info.Warnings, "Unable to find cpu cgroup in mounts")
		return
	}

	info.CPUShares = cgroupEnabled(mountPoint, "cpu.shares")
	if !info.CPUShares {
		info.Warnings = append(info.Warnings, "Your kernel does not support CPU shares")
	}

	info.CPUCfs = cgroupEnabled(mountPoint, "cpu.cfs_quota_us")
	if !info.CPUCfs {
		info.Warnings = append(info.Warnings, "Your kernel does not support CPU CFS scheduler")
	}

	info.CPURealtime = cgroupEnabled(mountPoint, "cpu.rt_period_us")
	if !info.CPURealtime {
		info.Warnings = append(info.Warnings, "Your kernel does not support CPU realtime scheduler")
	}
}

// applyBlkioCgroupInfo adds the blkio cgroup controller information to the info.
func applyBlkioCgroupInfo(info *SysInfo) {
	mountPoint, ok := info.cgMounts["blkio"]
	if !ok {
		info.Warnings = append(info.Warnings, "Unable to find blkio cgroup in mounts")
		return
	}

	info.BlkioWeight = cgroupEnabled(mountPoint, "blkio.weight")
	if !info.BlkioWeight {
		info.Warnings = append(info.Warnings, "Your kernel does not support cgroup blkio weight")
	}

	info.BlkioWeightDevice = cgroupEnabled(mountPoint, "blkio.weight_device")
	if !info.BlkioWeightDevice {
		info.Warnings = append(info.Warnings, "Your kernel does not support cgroup blkio weight_device")
	}

	info.BlkioReadBpsDevice = cgroupEnabled(mountPoint, "blkio.throttle.read_bps_device")
	if !info.BlkioReadBpsDevice {
		info.Warnings = append(info.Warnings, "Your kernel does not support cgroup blkio throttle.read_bps_device")
	}

	info.BlkioWriteBpsDevice = cgroupEnabled(mountPoint, "blkio.throttle.write_bps_device")
	if !info.BlkioWriteBpsDevice {
		info.Warnings = append(info.Warnings, "Your kernel does not support cgroup blkio throttle.write_bps_device")
	}
	info.BlkioReadIOpsDevice = cgroupEnabled(mountPoint, "blkio.throttle.read_iops_device")
	if !info.BlkioReadIOpsDevice {
		info.Warnings = append(info.Warnings, "Your kernel does not support cgroup blkio throttle.read_iops_device")
	}

	info.BlkioWriteIOpsDevice = cgroupEnabled(mountPoint, "blkio.throttle.write_iops_device")
	if !info.BlkioWriteIOpsDevice {
		info.Warnings = append(info.Warnings, "Your kernel does not support cgroup blkio throttle.write_iops_device")
	}
}

// applyCPUSetCgroupInfo adds the cpuset cgroup controller information to the info.
func applyCPUSetCgroupInfo(info *SysInfo) {
	mountPoint, ok := info.cgMounts["cpuset"]
	if !ok {
		info.Warnings = append(info.Warnings, "Unable to find cpuset cgroup in mounts")
		return
	}
	info.Cpuset = ok

	var err error

	cpus, err := os.ReadFile(path.Join(mountPoint, "cpuset.cpus"))
	if err != nil {
		return
	}
	info.Cpus = strings.TrimSpace(string(cpus))
	cpuSets, err := parseUintList(info.Cpus, 0)
	if err != nil {
		info.Warnings = append(info.Warnings, "Unable to parse cpuset cpus: "+err.Error())
		return
	}
	info.CPUSets = cpuSets

	mems, err := os.ReadFile(path.Join(mountPoint, "cpuset.mems"))
	if err != nil {
		return
	}
	info.Mems = strings.TrimSpace(string(mems))
	memSets, err := parseUintList(info.Cpus, 0)
	if err != nil {
		info.Warnings = append(info.Warnings, "Unable to parse cpuset mems: "+err.Error())
		return
	}
	info.MemSets = memSets
}

// applyPIDSCgroupInfo adds whether the pids cgroup controller is available to the info.
func applyPIDSCgroupInfo(info *SysInfo) {
	_, ok := info.cgMounts["pids"]
	if !ok {
		info.Warnings = append(info.Warnings, "Unable to find pids cgroup in mounts")
		return
	}
	info.PidsLimit = true
}

// applyDevicesCgroupInfo adds whether the devices cgroup controller is available to the info.
func applyDevicesCgroupInfo(info *SysInfo) {
	_, ok := info.cgMounts["devices"]
	info.CgroupDevicesEnabled = ok
}

// applyNetworkingInfo adds networking information to the info.
func applyNetworkingInfo(info *SysInfo) {
	info.IPv4ForwardingDisabled = !readProcBool("/proc/sys/net/ipv4/ip_forward")
}

// applyAppArmorInfo adds whether AppArmor is enabled to the info.
func applyAppArmorInfo(info *SysInfo) {
	info.AppArmor = apparmorSupported()
}

// applyCgroupNsInfo adds whether cgroupns is enabled to the info.
func applyCgroupNsInfo(info *SysInfo) {
	info.CgroupNamespaces = cgroupnsSupported()
}

// applySeccompInfo checks if Seccomp is supported, via CONFIG_SECCOMP.
func applySeccompInfo(info *SysInfo) {
	info.Seccomp = seccomp.IsEnabled()
}

// apparmorSupported adds whether AppArmor is enabled.
func apparmorSupported() bool {
	if _, err := os.Stat("/sys/kernel/security/apparmor"); !os.IsNotExist(err) {
		if _, err := os.ReadFile("/sys/kernel/security/apparmor/profiles"); err == nil {
			return true
		}
	}
	return false
}

// cgroupnsSupported adds whether cgroup namespaces are supported.
func cgroupnsSupported() bool {
	if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
		return true
	}
	return false
}

func cgroupEnabled(mountPoint, name string) bool {
	_, err := os.Stat(path.Join(mountPoint, name))
	return err == nil
}

func readProcBool(path string) bool {
	val, err := os.ReadFile(path)
	if err != nil {
		return false
	}
	return strings.TrimSpace(string(val)) == "1"
}

// defaultMaxCPUs is the normal maximum number of CPUs on Linux.
const defaultMaxCPUs = 8192

func isCpusetListAvailable(requested string, available map[int]struct{}) (bool, error) {
	// Start with the normal maximum number of CPUs on Linux, but accept
	// more if we actually have more CPUs available.
	//
	// This limit was added in f8e876d7616469d07b8b049ecb48967eeb8fa7a5
	// to address CVE-2018-20699:
	//
	// Using a value such as `--cpuset-mems=1-9223372036854775807` would cause
	// dockerd to run out of memory allocating a map of the values in the
	// validation code. Set limits to the normal limit of the number of CPUs.
	//
	// More details in https://github.com/docker-archive/engine/pull/70#issuecomment-458458288
	maxCPUs := defaultMaxCPUs
	for m := range available {
		if m > maxCPUs {
			maxCPUs = m
		}
	}
	parsedRequested, err := parseUintList(requested, maxCPUs)
	if err != nil {
		return false, err
	}
	for k := range parsedRequested {
		if _, ok := available[k]; !ok {
			return false, nil
		}
	}
	return true, nil
}

// parseUintList parses and validates the specified string as the value
// found in some cgroup file (e.g. `cpuset.cpus`, `cpuset.mems`), which could be
// one of the formats below. Note that duplicates are actually allowed in the
// input string. It returns a `map[int]bool` with available elements from `val`
// set to `true`. Values larger than `maximum` cause an error if max is non-zero,
// in order to stop the map becoming excessively large.
// Supported formats:
//
//	7
//	1-6
//	0,3-4,7,8-10
//	0-0,0,1-7
//	03,1-3      <- this is gonna get parsed as [1,2,3]
//	3,2,1
//	0-2,3,1
func parseUintList(val string, maximum int) (map[int]struct{}, error) {
	if val == "" {
		return map[int]struct{}{}, nil
	}

	availableInts := make(map[int]struct{})
	errInvalidFormat := fmt.Errorf("invalid format: %s", val)

	for _, r := range strings.Split(val, ",") {
		if !strings.Contains(r, "-") {
			v, err := strconv.Atoi(r)
			if err != nil {
				return nil, errInvalidFormat
			}
			if maximum != 0 && v > maximum {
				return nil, fmt.Errorf("value of out range, maximum is %d", maximum)
			}
			availableInts[v] = struct{}{}
		} else {
			minS, maxS, _ := strings.Cut(r, "-")
			minAvailable, err := strconv.Atoi(minS)
			if err != nil {
				return nil, errInvalidFormat
			}
			maxAvailable, err := strconv.Atoi(maxS)
			if err != nil {
				return nil, errInvalidFormat
			}
			if maxAvailable < minAvailable {
				return nil, errInvalidFormat
			}
			if maximum != 0 && maxAvailable > maximum {
				return nil, fmt.Errorf("value of out range, maximum is %d", maximum)
			}
			for i := minAvailable; i <= maxAvailable; i++ {
				availableInts[i] = struct{}{}
			}
		}
	}
	return availableInts, nil
}