1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
|
// Copyright (c) Contributors to the Apptainer project, established as
// Apptainer a Series of LF Projects LLC.
// For website terms of use, trademark policy, privacy policy and other
// project policies see https://lfprojects.org/policies
// Copyright (c) 2018-2022, Sylabs Inc. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.
package image
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/apptainer/apptainer/internal/pkg/util/fs"
"github.com/apptainer/apptainer/internal/pkg/util/user"
"github.com/apptainer/apptainer/pkg/sylog"
"github.com/apptainer/apptainer/pkg/util/fs/lock"
)
const (
// SQUASHFS constant for squashfs format
SQUASHFS = iota + 0x1000
// EXT3 constant for ext3 format
EXT3
// SANDBOX constant for directory format
SANDBOX
// SIF constant for sif format
SIF
// ENCRYPTSQUASHFS constant for encrypted squashfs format
ENCRYPTSQUASHFS
// RAW constant for raw format
RAW
// GOCRYPTFS constant for encrypted gocryptfs format
GOCRYPTFSSQUASHFS
)
type Usage uint8
const (
// RootFsUsage defines flag for image/partition
// usable as root filesystem.
RootFsUsage = Usage(1 << iota)
// OverlayUsage defines flag for image/partition
// usable as overlay.
OverlayUsage
// DataUsage defines flag for image/partition
// usable as data.
DataUsage
)
const (
// RootFs partition name
RootFs = "!__rootfs__!"
launchString = " run-singularity"
bufferSize = 2048
emptyFd = ^uintptr(0)
)
// debugError represents an error considered for debugging
// purpose rather than real error, this helps to distinguish
// those errors between real image format error during
// initializer loop.
type debugError string
func (e debugError) Error() string { return string(e) }
func debugErrorf(format string, a ...interface{}) error {
e := fmt.Sprintf(format, a...)
return debugError(e)
}
// readOnlyFilesystemError represents an error returned by
// read-only filesystem image when attempted to be opened
// as writable.
type readOnlyFilesystemError struct {
s string
}
func (e *readOnlyFilesystemError) Error() string {
return e.s
}
// IsReadOnlyFilesytem returns if the corresponding error
// is a read-only filesystem error or not.
func IsReadOnlyFilesytem(err error) bool {
if err == nil {
return false
}
_, ok := err.(*readOnlyFilesystemError)
return ok
}
// ErrUnknownFormat represents an unknown image format error.
var ErrUnknownFormat = errors.New("image format not recognized")
var registeredFormats = []struct {
name string
format format
}{
{"sandbox", &sandboxFormat{}},
{"sif", &sifFormat{}},
{"squashfs", &squashfsFormat{}},
{"ext3", &ext3Format{}},
}
// format describes the interface that an image format type must implement.
type format interface {
openMode(bool) int
initializer(*Image, os.FileInfo) error
lock(*Image) error
}
// Section identifies and locates a data section in image object.
type Section struct {
Name string `json:"name"`
Size uint64 `json:"size"`
Offset uint64 `json:"offset"`
ID uint32 `json:"id"`
Type uint32 `json:"type"`
AllowedUsage Usage `json:"allowed_usage"`
}
// Image describes an image object, an image is composed of one
// or more partitions (eg: container root filesystem, overlay),
// image format like SIF contains descriptors pointing to chunk of
// data, chunks position and size are stored as image sections.
type Image struct {
Partitions []Section `json:"partitions"`
Sections []Section `json:"sections"`
Path string `json:"path"`
Name string `json:"name"`
Source string `json:"source"`
Type int `json:"type"`
File *os.File `json:"-"`
Fd uintptr `json:"fd"`
Writable bool `json:"writable"`
Usage Usage `json:"usage"`
}
// ReInit fills in the File object if needed. This function should be
// called after passing an image object between processes using JSON
func (i *Image) ReInit() {
if i.File == nil && i.Path != "" {
i.File = os.NewFile(i.Fd, i.Path)
}
}
// AuthorizedPath checks if image is in a path supplied in paths
func (i *Image) AuthorizedPath(paths []string) (bool, error) {
authorized := false
dirname := i.Path
for _, path := range paths {
match, err := filepath.EvalSymlinks(filepath.Clean(path))
if err != nil {
return authorized, fmt.Errorf("failed to resolve path %s: %s", path, err)
}
if strings.HasPrefix(dirname, match) {
authorized = true
break
}
}
return authorized, nil
}
// AuthorizedOwner checks whether the image is owned by any user from the supplied users list.
func (i *Image) AuthorizedOwner(owners []string) (bool, error) {
fileinfo, err := i.File.Stat()
if err != nil {
return false, fmt.Errorf("failed to get stat for %s", i.Path)
}
uid := fileinfo.Sys().(*syscall.Stat_t).Uid
for _, owner := range owners {
pw, err := user.GetPwNam(owner)
if err != nil {
return false, fmt.Errorf("failed to retrieve user information for %s: %s", owner, err)
}
if pw.UID == uid {
return true, nil
}
}
return false, nil
}
// AuthorizedGroup checks whether the image is owned by any group from the supplied groups list.
func (i *Image) AuthorizedGroup(groups []string) (bool, error) {
fileinfo, err := i.File.Stat()
if err != nil {
return false, fmt.Errorf("failed to get stat for %s", i.Path)
}
gid := fileinfo.Sys().(*syscall.Stat_t).Gid
for _, group := range groups {
gr, err := user.GetGrNam(group)
if err != nil {
return false, fmt.Errorf("failed to retrieve group information for %s: %s", group, err)
}
if gr.GID == gid {
return true, nil
}
}
return false, nil
}
// getPartitions returns partitions based on their usage.
func (i *Image) getPartitions(usage Usage) ([]Section, error) {
sections := make([]Section, 0)
if i.Usage&usage == 0 {
return sections, nil
}
for _, p := range i.Partitions {
if p.AllowedUsage&usage != 0 {
sections = append(sections, p)
}
}
return sections, nil
}
// GetAllPartitions returns all partitions found in the image.
func (i *Image) GetAllPartitions() ([]Section, error) {
return i.getPartitions(RootFsUsage | OverlayUsage | DataUsage)
}
// GetRootFsPartition returns the first root filesystem partition
// found in the image.
func (i *Image) GetRootFsPartition() (*Section, error) {
partitions, err := i.GetRootFsPartitions()
if err != nil {
return nil, err
} else if len(partitions) == 0 {
return nil, fmt.Errorf("no root filesystem found")
}
return &partitions[0], nil
}
// GetRootFsPartitions returns root filesystem partitions found
// in the image.
func (i *Image) GetRootFsPartitions() ([]Section, error) {
return i.getPartitions(RootFsUsage)
}
// GetOverlayPartitions returns overlay partitions found in the image.
func (i *Image) GetOverlayPartitions() ([]Section, error) {
return i.getPartitions(OverlayUsage)
}
// GetDataPartitions returns data partitions found in the image.
func (i *Image) GetDataPartitions() ([]Section, error) {
return i.getPartitions(DataUsage)
}
// EncryptedRootFs returns "encryptfs" if the image contains a device-mapper
// encrypted root partition, "gocryptfs" if it contains a gocryptfs
// encrypted root partition, or an empty string if there is no encryption
func (i *Image) EncryptedRootFs() (encryptionType string, err error) {
rootFsParts, err := i.GetRootFsPartitions()
if err != nil {
return "", fmt.Errorf("while getting root FS partitions: %v", err)
}
for _, p := range rootFsParts {
if p.Type == ENCRYPTSQUASHFS {
return "encryptfs", nil
}
if p.Type == GOCRYPTFSSQUASHFS {
return "gocryptfs", nil
}
}
return "", nil
}
// writeLocks tracks write locks for the current process.
var writeLocks = make(map[string][]Section)
// readLocks tracks read locks for the current process.
var readLocks = make(map[string][]Section)
// lockSection puts a file byte-range lock on a section to prevent
// from concurrent writes depending if the image is writable or
// not. If the image is writable, calling this function will place
// a write lock for the corresponding section preventing further use
// if the section is used for writing or reading only, if the image is
// not writable this function place a read lock to prevent section
// from being written while the section is used in read-only mode.
func lockSection(i *Image, section Section) error {
fd := int(i.Fd)
start := int64(section.Offset)
size := int64(section.Size)
br := lock.NewByteRange(fd, start, size)
var err error
if i.Writable {
err = br.Lock()
if err == nil {
// sadly we need to track same write locks from
// the same process because a process may place
// as many write lock without any error
if sections, ok := readLocks[i.Path]; ok {
for _, s := range sections {
if s.Offset == section.Offset && s.Size == section.Size {
return fmt.Errorf("can't open %s for writing, already used for reading by this process", i.Path)
}
}
}
if sections, ok := writeLocks[i.Path]; ok {
for _, s := range sections {
if s.Offset == section.Offset && s.Size == section.Size {
return fmt.Errorf("can't open %s for writing, already used for writing by this process", i.Path)
}
}
}
writeLocks[i.Path] = append(writeLocks[i.Path], section)
}
} else {
err = br.RLock()
if err == nil {
if sections, ok := writeLocks[i.Path]; ok {
for _, s := range sections {
if s.Offset == section.Offset && s.Size == section.Size {
return fmt.Errorf("can't open %s for reading, already used for writing by this process", i.Path)
}
}
}
readLocks[i.Path] = append(readLocks[i.Path], section)
}
}
if err == lock.ErrByteRangeAcquired {
if i.Writable {
return fmt.Errorf("can't open %s for writing, currently in use by another process", i.Path)
}
return fmt.Errorf("can't open %s for reading, currently in use for writing by another process", i.Path)
} else if err == lock.ErrLockNotSupported {
// ENOLCK means that the underlying filesystem doesn't support
// lock, so we simply ignore the error in order to allow ext3
// images located on the underlying filesystem to run correctly
// and advertise user in log
sylog.Verbosef("Could not set lock on %s section %q, underlying filesystem seems to not support lock", i.Path, section.Name)
sylog.Verbosef("Data corruptions may occur if %s is open for writing by multiple processes", i.Path)
return nil
}
return err
}
// ResolvePath returns a resolved absolute path.
func ResolvePath(path string) (string, error) {
abspath, err := fs.Abs(path)
if err != nil {
return "", fmt.Errorf("failed to get absolute path: %s", err)
}
resolvedPath, err := filepath.EvalSymlinks(abspath)
if err != nil {
return "", fmt.Errorf("failed to retrieve path for %s: %s", path, err)
}
return resolvedPath, nil
}
// Init initializes an image object based on given path.
func Init(path string, writable bool) (*Image, error) {
sylog.Debugf("Image format detection")
resolvedPath, err := ResolvePath(path)
if err != nil {
return nil, err
}
if !fs.IsReadable(resolvedPath) {
return nil, fmt.Errorf("%s is not readable by the current user, check permissions", resolvedPath)
}
img := &Image{
Path: resolvedPath,
Name: filepath.Base(resolvedPath),
Fd: emptyFd,
Usage: RootFsUsage,
}
for _, rf := range registeredFormats {
sylog.Debugf("Check for %s image format", rf.name)
img.Writable = writable
mode := rf.format.openMode(writable)
if mode&os.O_RDWR != 0 {
if !fs.IsWritable(resolvedPath) {
sylog.Debugf("Opening %s in read-only mode: no write permissions", path)
mode = os.O_RDONLY
img.Writable = false
}
}
img.File, err = os.OpenFile(resolvedPath, mode, 0)
if err != nil {
continue
}
fileinfo, err := img.File.Stat()
if err != nil {
_ = img.File.Close()
return nil, err
}
// readOnlyFilesystemError is allowed here and passed back
// to the caller because there is basically no error with
// the image format just a mismatch with writable parameter,
// so the decision is delegated to the caller
initErr := rf.format.initializer(img, fileinfo)
if _, ok := initErr.(debugError); ok {
sylog.Debugf("%s format initializer returned: %v", rf.name, initErr)
_ = img.File.Close()
continue
} else if initErr != nil && !IsReadOnlyFilesytem(initErr) {
_ = img.File.Close()
return nil, initErr
}
sylog.Debugf("%s image format detected", rf.name)
if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, img.File.Fd(), syscall.F_SETFD, syscall.O_CLOEXEC); err != 0 {
sylog.Warningf("failed to set O_CLOEXEC flags on image")
}
img.Source = fmt.Sprintf("/proc/self/fd/%d", img.File.Fd())
img.Fd = img.File.Fd()
if err := rf.format.lock(img); err != nil {
_ = img.File.Close()
return nil, err
}
return img, initErr
}
return nil, ErrUnknownFormat
}
|