1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
|
//go:build linux && cgo && !agent
package sys
import (
"errors"
"os"
"os/user"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/lxc/incus/v6/internal/linux"
"github.com/lxc/incus/v6/internal/server/cgroup"
"github.com/lxc/incus/v6/internal/server/db/cluster"
localUtil "github.com/lxc/incus/v6/internal/server/util"
internalUtil "github.com/lxc/incus/v6/internal/util"
"github.com/lxc/incus/v6/internal/version"
"github.com/lxc/incus/v6/shared/idmap"
"github.com/lxc/incus/v6/shared/logger"
"github.com/lxc/incus/v6/shared/osarch"
"github.com/lxc/incus/v6/shared/util"
)
// InotifyTargetInfo records the inotify information associated with a given
// inotify target.
type InotifyTargetInfo struct {
Mask uint32
Wd int
Path string
}
// InotifyInfo records the inotify information associated with a given
// inotify instance.
type InotifyInfo struct {
Fd int
sync.RWMutex
Targets map[string]*InotifyTargetInfo
}
// OS is a high-level facade for accessing operating-system level functionalities.
type OS struct {
// Directories
CacheDir string // Cache directory (e.g. /var/cache/incus/).
LogDir string // Log directory (e.g. /var/log/incus/).
RunDir string // Runtime directory (e.g. /run/incus/).
VarDir string // Data directory (e.g. /var/lib/incus/).
// Daemon environment
Architectures []int // Cache of detected system architectures
BackingFS string // Backing filesystem of $INCUS_DIR/containers
ExecPath string // Absolute path to the daemon
IdmapSet *idmap.Set // Information about user/group ID mapping
InotifyWatch InotifyInfo
LxcPath string // Path to the $INCUS_DIR/containers directory
MockMode bool // If true some APIs will be mocked (for testing)
Nodev bool
RunningInUserNS bool
Hostname string
// Privilege dropping
UnprivUser string
UnprivUID uint32
UnprivGroup string
UnprivGID uint32
// Apparmor features
AppArmorAdmin bool
AppArmorAvailable bool
AppArmorConfined bool
AppArmorStacked bool
AppArmorStacking bool
// Cgroup features
CGInfo cgroup.Info
// Kernel features
CloseRange bool // CloseRange indicates support for the close_range syscall.
ContainerCoreScheduling bool // ContainerCoreScheduling indicates LXC and kernel support for core scheduling.
CoreScheduling bool // CoreScheduling indicates support for core scheduling syscalls.
IdmappedMounts bool // IdmappedMounts indicates kernel support for VFS idmap.
NativeTerminals bool // NativeTerminals indicates support for TIOGPTPEER ioctl.
NetnsGetifaddrs bool // NetnsGetifaddrs indicates support for NETLINK_GET_STRICT_CHK.
PidFds bool // PidFds indicates support for PID fds.
PidFdsThread bool // PidFds indicates support for thread PID fds.
PidFdSetns bool // PidFdSetns indicates support for setns through PID fds.
SeccompListenerAddfd bool // SeccompListenerAddfd indicates support for passing new FD to process through seccomp notify.
SeccompListener bool // SeccompListener indicates support for seccomp notify.
SeccompListenerContinue bool // SeccompListenerContinue indicates support continuing syscalls path for process through seccomp notify.
UeventInjection bool // UeventInjection indicates support for injecting uevents to a specific netns.
UnprivBinfmt bool // UnprivBinfmt indicates support for mounting binfmt_misc inside of a user namespace.
VFS3Fscaps bool // VFS3FScaps indicates support for v3 filesystem capacbilities.
// LXC features
LXCFeatures map[string]bool
// OS info
ReleaseInfo map[string]string
KernelVersion version.DottedVersion
Uname *linux.Utsname
BootTime time.Time
IncusOS bool
}
// DefaultOS returns a fresh uninitialized OS instance with default values.
func DefaultOS() *OS {
newOS := &OS{
CacheDir: internalUtil.CachePath(),
LogDir: internalUtil.LogPath(),
RunDir: internalUtil.RunPath(),
VarDir: internalUtil.VarPath(),
}
newOS.InotifyWatch.Fd = -1
newOS.InotifyWatch.Targets = make(map[string]*InotifyTargetInfo)
newOS.ReleaseInfo = make(map[string]string)
return newOS
}
// Init our internal data structures.
func (s *OS) Init() ([]cluster.Warning, error) {
var dbWarnings []cluster.Warning
err := s.initDirs()
if err != nil {
return nil, err
}
s.Architectures, err = localUtil.GetArchitectures()
if err != nil {
return nil, err
}
s.LxcPath = filepath.Join(s.VarDir, "containers")
s.BackingFS, err = linux.DetectFilesystem(s.LxcPath)
if err != nil {
logger.Error("Error detecting backing fs", logger.Ctx{"err": err})
}
// Detect if it is possible to run daemons as an unprivileged user and group.
for _, userName := range []string{"incus", "nobody"} {
u, err := user.Lookup(userName)
if err != nil {
continue
}
uid, err := strconv.ParseUint(u.Uid, 10, 32)
if err != nil {
return nil, err
}
s.UnprivUser = userName
s.UnprivUID = uint32(uid)
break
}
for _, groupName := range []string{"incus", "nogroup"} {
g, err := user.LookupGroup(groupName)
if err != nil {
continue
}
gid, err := strconv.ParseUint(g.Gid, 10, 32)
if err != nil {
return nil, err
}
s.UnprivGroup = groupName
s.UnprivGID = uint32(gid)
break
}
s.IdmapSet = getIdmapset()
s.ExecPath = localUtil.GetExecPath()
s.RunningInUserNS = linux.RunningInUserNS()
s.Hostname, err = os.Hostname()
if err != nil {
return nil, err
}
dbWarnings = s.initAppArmor()
cgroup.Init()
s.CGInfo = cgroup.GetInfo()
// Fill in the OS release info.
osInfo, err := osarch.GetOSRelease()
if err != nil {
return nil, err
}
s.ReleaseInfo = osInfo
uname, err := linux.Uname()
if err != nil {
return nil, err
}
s.Uname = uname
kernelVersion, err := version.Parse(uname.Release)
if err == nil {
s.KernelVersion = *kernelVersion
}
s.IncusOS = util.PathExists("/var/lib/incus-os/")
// Fill in the boot time.
out, err := os.ReadFile("/proc/stat")
if err != nil {
return nil, err
}
btime := int64(0)
for _, line := range strings.Split(string(out), "\n") {
if !strings.HasPrefix(line, "btime ") {
continue
}
fields := strings.Fields(line)
btime, err = strconv.ParseInt(fields[1], 10, 64)
if err != nil {
return nil, err
}
break
}
if btime > 0 {
s.BootTime = time.Unix(btime, 0)
}
return dbWarnings, nil
}
// InitStorage initializes the storage layer after it has been mounted.
func (s *OS) InitStorage() error {
return s.initStorageDirs()
}
// GetUnixSocket returns the full path to the unix.socket file that this daemon is listening on. Used by tests.
func (s *OS) GetUnixSocket() string {
path := os.Getenv("INCUS_SOCKET")
if path != "" {
return path
}
return filepath.Join(s.VarDir, "unix.socket")
}
func getIdmapset() *idmap.Set {
// Try getting the system map.
idmapset, err := idmap.NewSetFromSystem("root")
if err != nil && !errors.Is(err, idmap.ErrSubidUnsupported) {
logger.Error("Unable to parse system idmap", logger.Ctx{"err": err})
return nil
}
if idmapset != nil {
logger.Info("System idmap (root user):")
for _, entry := range idmapset.ToLXCString() {
logger.Infof(" - %s", entry)
}
// Only keep the POSIX ranges.
submap := idmapset.FilterPOSIX()
if submap == nil {
logger.Warn("No valid subuid/subgid map, only privileged containers will be functional")
return nil
}
logger.Info("Selected idmap:")
for _, entry := range submap.ToLXCString() {
logger.Infof(" - %s", entry)
}
return submap
}
// Try getting the process map.
idmapset, err = idmap.NewSetFromCurrentProcess()
if err != nil {
logger.Error("Unable to parse process idmap", logger.Ctx{"err": err})
return nil
}
// Swap HostID for NSID and clear NSID (to turn into a usable map).
for i, entry := range idmapset.Entries {
idmapset.Entries[i].HostID = entry.NSID
idmapset.Entries[i].NSID = 0
}
logger.Info("Current process idmap:")
for _, entry := range idmapset.ToLXCString() {
logger.Infof(" - %s", entry)
}
// Try splitting a larger chunk from the current map.
submap, err := idmapset.Split(65536, 1000000000, 1000000, -1)
if err != nil && !errors.Is(err, idmap.ErrNoSuitableSubmap) {
logger.Error("Unable to split a submap", logger.Ctx{"err": err})
return nil
}
if submap != nil {
logger.Info("Selected idmap:")
for _, entry := range submap.ToLXCString() {
logger.Infof(" - %s", entry)
}
return submap
}
// Try splitting a smaller chunk from the current map.
submap, err = idmapset.Split(65536, 1000000000, 65536, -1)
if err != nil {
if errors.Is(err, idmap.ErrNoSuitableSubmap) {
logger.Warn("Not enough uid/gid available, only privileged containers will be functional")
return nil
}
logger.Error("Unable to split a submap", logger.Ctx{"err": err})
return nil
}
logger.Info("Selected idmap:")
for _, entry := range submap.ToLXCString() {
logger.Infof(" - %s", entry)
}
return submap
}
|