1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
|
package libcontainer
import (
"errors"
"fmt"
"io/fs"
"os"
"strconv"
"strings"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/internal/userns"
"github.com/opencontainers/runc/libcontainer/utils"
)
// mountSourceType indicates what type of file descriptor is being returned. It
// is used to tell rootfs_linux.go whether or not to use move_mount(2) to
// install the mount.
type mountSourceType string
const (
// An open_tree(2)-style file descriptor that needs to be installed using
// move_mount(2) to install.
mountSourceOpenTree mountSourceType = "open_tree"
// A plain file descriptor that can be mounted through /proc/thread-self/fd.
mountSourcePlain mountSourceType = "plain-open"
)
type mountSource struct {
Type mountSourceType `json:"type"`
file *os.File `json:"-"`
}
// mountError holds an error from a failed mount or unmount operation.
type mountError struct {
op string
source string
srcFile *mountSource
target string
dstFd string
flags uintptr
data string
err error
}
// int32plus is a collection of int types with >=32 bits.
type int32plus interface {
int | uint | int32 | uint32 | int64 | uint64 | uintptr
}
// stringifyMountFlags converts mount(2) flags to a string that you can use in
// error messages.
func stringifyMountFlags[Int int32plus](flags Int) string {
flagNames := []struct {
name string
bits Int
}{
{"MS_RDONLY", unix.MS_RDONLY},
{"MS_NOSUID", unix.MS_NOSUID},
{"MS_NODEV", unix.MS_NODEV},
{"MS_NOEXEC", unix.MS_NOEXEC},
{"MS_SYNCHRONOUS", unix.MS_SYNCHRONOUS},
{"MS_REMOUNT", unix.MS_REMOUNT},
{"MS_MANDLOCK", unix.MS_MANDLOCK},
{"MS_DIRSYNC", unix.MS_DIRSYNC},
{"MS_NOSYMFOLLOW", unix.MS_NOSYMFOLLOW},
// No (1 << 9) flag.
{"MS_NOATIME", unix.MS_NOATIME},
{"MS_NODIRATIME", unix.MS_NODIRATIME},
{"MS_BIND", unix.MS_BIND},
{"MS_MOVE", unix.MS_MOVE},
{"MS_REC", unix.MS_REC},
// MS_VERBOSE was deprecated and swapped to MS_SILENT.
{"MS_SILENT", unix.MS_SILENT},
{"MS_POSIXACL", unix.MS_POSIXACL},
{"MS_UNBINDABLE", unix.MS_UNBINDABLE},
{"MS_PRIVATE", unix.MS_PRIVATE},
{"MS_SLAVE", unix.MS_SLAVE},
{"MS_SHARED", unix.MS_SHARED},
{"MS_RELATIME", unix.MS_RELATIME},
// MS_KERNMOUNT (1 << 22) is internal to the kernel.
{"MS_I_VERSION", unix.MS_I_VERSION},
{"MS_STRICTATIME", unix.MS_STRICTATIME},
{"MS_LAZYTIME", unix.MS_LAZYTIME},
}
var (
flagSet []string
seenBits Int
)
for _, flag := range flagNames {
if flags&flag.bits == flag.bits {
seenBits |= flag.bits
flagSet = append(flagSet, flag.name)
}
}
// If there were any remaining flags specified we don't know the name of,
// just add them in an 0x... format.
if remaining := flags &^ seenBits; remaining != 0 {
flagSet = append(flagSet, "0x"+strconv.FormatUint(uint64(remaining), 16))
}
return strings.Join(flagSet, "|")
}
// Error provides a string error representation.
func (e *mountError) Error() string {
out := e.op + " "
if e.source != "" {
out += "src=" + e.source + ", "
if e.srcFile != nil {
out += "srcType=" + string(e.srcFile.Type) + ", "
out += "srcFd=" + strconv.Itoa(int(e.srcFile.file.Fd())) + ", "
}
}
out += "dst=" + e.target
if e.dstFd != "" {
out += ", dstFd=" + e.dstFd
}
if e.flags != uintptr(0) {
out += ", flags=" + stringifyMountFlags(e.flags)
}
if e.data != "" {
out += ", data=" + e.data
}
out += ": " + e.err.Error()
return out
}
// Unwrap returns the underlying error.
// This is a convention used by Go 1.13+ standard library.
func (e *mountError) Unwrap() error {
return e.err
}
// mount is a simple unix.Mount wrapper, returning an error with more context
// in case it failed.
func mount(source, target, fstype string, flags uintptr, data string) error {
return mountViaFds(source, nil, target, "", fstype, flags, data)
}
// mountViaFds is a unix.Mount wrapper which uses srcFile instead of source,
// and dstFd instead of target, unless those are empty.
//
// If srcFile is non-nil and flags does not contain MS_REMOUNT, mountViaFds
// will mount it according to the mountSourceType of the file descriptor.
//
// The dstFd argument, if non-empty, is expected to be in the form of a path to
// an opened file descriptor on procfs (i.e. "/proc/thread-self/fd/NN").
//
// If a file descriptor is used instead of a source or a target path, the
// corresponding path is only used to add context to an error in case the mount
// operation has failed.
func mountViaFds(source string, srcFile *mountSource, target, dstFd, fstype string, flags uintptr, data string) error {
// MS_REMOUNT and srcFile don't make sense together.
if srcFile != nil && flags&unix.MS_REMOUNT != 0 {
logrus.Debugf("mount source passed along with MS_REMOUNT -- ignoring srcFile")
srcFile = nil
}
dst := target
if dstFd != "" {
dst = dstFd
}
src := source
isMoveMount := srcFile != nil && srcFile.Type == mountSourceOpenTree
if srcFile != nil {
// If we're going to use the /proc/thread-self/... path for classic
// mount(2), we need to get a safe handle to /proc/thread-self. This
// isn't needed for move_mount(2) because in that case the path is just
// a dummy string used for error info.
srcFileFd := srcFile.file.Fd()
if isMoveMount {
src = "/proc/self/fd/" + strconv.Itoa(int(srcFileFd))
} else {
var closer utils.ProcThreadSelfCloser
src, closer = utils.ProcThreadSelfFd(srcFileFd)
defer closer()
}
}
var op string
var err error
if isMoveMount {
op = "move_mount"
err = unix.MoveMount(int(srcFile.file.Fd()), "",
unix.AT_FDCWD, dstFd,
unix.MOVE_MOUNT_F_EMPTY_PATH|unix.MOVE_MOUNT_T_SYMLINKS)
} else {
op = "mount"
err = unix.Mount(src, dst, fstype, flags, data)
}
if err != nil {
return &mountError{
op: op,
source: source,
srcFile: srcFile,
target: target,
dstFd: dstFd,
flags: flags,
data: data,
err: err,
}
}
return nil
}
// unmount is a simple unix.Unmount wrapper.
func unmount(target string, flags int) error {
err := unix.Unmount(target, flags)
if err != nil {
return &mountError{
op: "unmount",
target: target,
flags: uintptr(flags),
err: err,
}
}
return nil
}
// syscallMode returns the syscall-specific mode bits from Go's portable mode bits.
// Copy from https://cs.opensource.google/go/go/+/refs/tags/go1.20.7:src/os/file_posix.go;l=61-75
func syscallMode(i fs.FileMode) (o uint32) {
o |= uint32(i.Perm())
if i&fs.ModeSetuid != 0 {
o |= unix.S_ISUID
}
if i&fs.ModeSetgid != 0 {
o |= unix.S_ISGID
}
if i&fs.ModeSticky != 0 {
o |= unix.S_ISVTX
}
// No mapping for Go's ModeTemporary (plan9 only).
return o
}
// mountFd creates a "mount source fd" (either through open_tree(2) or just
// open(O_PATH)) based on the provided configuration. This function must be
// called from within the container's mount namespace.
//
// In the case of idmapped mount configurations, the returned mount source will
// be an open_tree(2) file with MOUNT_ATTR_IDMAP applied. For other
// bind-mounts, it will be an O_PATH. If the type of mount cannot be handled,
// the returned mountSource will be nil, indicating that the container init
// process will need to do an old-fashioned mount(2) themselves.
//
// This helper is only intended to be used by goCreateMountSources.
func mountFd(nsHandles *userns.Handles, m *configs.Mount) (*mountSource, error) {
if !m.IsBind() {
return nil, errors.New("new mount api: only bind-mounts are supported")
}
if nsHandles == nil {
nsHandles = new(userns.Handles)
defer nsHandles.Release()
}
var mountFile *os.File
var sourceType mountSourceType
// Ideally, we would use OPEN_TREE_CLONE for everything, because we can
// be sure that the file descriptor cannot be used to escape outside of
// the mount root. Unfortunately, OPEN_TREE_CLONE is far more expensive
// than open(2) because it requires doing mounts inside a new anonymous
// mount namespace. So we use open(2) for standard bind-mounts, and
// OPEN_TREE_CLONE when we need to set mount attributes here.
//
// While passing open(2)'d paths from the host rootfs isn't exactly the
// safest thing in the world, the files will not survive across
// execve(2) and "runc init" is non-dumpable so it should not be
// possible for a malicious container process to gain access to the
// file descriptors. We also don't do any of this for "runc exec",
// lessening the risk even further.
if m.IsIDMapped() {
flags := uint(unix.OPEN_TREE_CLONE | unix.OPEN_TREE_CLOEXEC)
if m.Flags&unix.MS_REC == unix.MS_REC {
flags |= unix.AT_RECURSIVE
}
fd, err := unix.OpenTree(unix.AT_FDCWD, m.Source, flags)
if err != nil {
return nil, &os.PathError{Op: "open_tree(OPEN_TREE_CLONE)", Path: m.Source, Err: err}
}
mountFile = os.NewFile(uintptr(fd), m.Source)
sourceType = mountSourceOpenTree
// Configure the id mapping.
var usernsFile *os.File
if m.IDMapping.UserNSPath == "" {
usernsFile, err = nsHandles.Get(userns.Mapping{
UIDMappings: m.IDMapping.UIDMappings,
GIDMappings: m.IDMapping.GIDMappings,
})
if err != nil {
return nil, fmt.Errorf("failed to create userns for %s id-mapping: %w", m.Source, err)
}
} else {
usernsFile, err = os.Open(m.IDMapping.UserNSPath)
if err != nil {
return nil, fmt.Errorf("failed to open existing userns for %s id-mapping: %w", m.Source, err)
}
}
defer usernsFile.Close()
setAttrFlags := uint(unix.AT_EMPTY_PATH)
// If the mount has "ridmap" set, we apply the configuration
// recursively. This allows you to create "rbind" mounts where only
// the top-level mount has an idmapping. I'm not sure why you'd
// want that, but still...
if m.IDMapping.Recursive {
setAttrFlags |= unix.AT_RECURSIVE
}
if err := unix.MountSetattr(int(mountFile.Fd()), "", setAttrFlags, &unix.MountAttr{
Attr_set: unix.MOUNT_ATTR_IDMAP,
Userns_fd: uint64(usernsFile.Fd()),
}); err != nil {
extraMsg := ""
if err == unix.EINVAL {
extraMsg = " (maybe the filesystem used doesn't support idmap mounts on this kernel?)"
}
return nil, fmt.Errorf("failed to set MOUNT_ATTR_IDMAP on %s: %w%s", m.Source, err, extraMsg)
}
} else {
var err error
mountFile, err = os.OpenFile(m.Source, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
sourceType = mountSourcePlain
}
return &mountSource{
Type: sourceType,
file: mountFile,
}, nil
}
|