1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package kernel
import (
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux/errno"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
"gvisor.dev/gvisor/pkg/sentry/loader"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/syserr"
)
var errNoSyscalls = syserr.New("no syscall table found", errno.ENOEXEC)
// Auxmap contains miscellaneous data for the task.
type Auxmap map[string]any
// TaskImage is the subset of a task's data that is provided by the loader.
//
// +stateify savable
type TaskImage struct {
// Name is the thread name set by the prctl(PR_SET_NAME) system call.
Name string
// Arch is the architecture-specific context (registers, etc.)
Arch *arch.Context64
// MemoryManager is the task's address space.
MemoryManager *mm.MemoryManager
// fu implements futexes in the address space.
fu *futex.Manager
// st is the task's syscall table.
st *SyscallTable `state:".(syscallTableInfo)"`
}
// release releases all resources held by the TaskImage. release is called by
// the task when it execs into a new TaskImage.
func (image *TaskImage) release() {
// Nil out pointers so that if the task is saved after release, it doesn't
// follow the pointers to possibly now-invalid objects.
if image.MemoryManager != nil {
image.MemoryManager.DecUsers(context.Background())
image.MemoryManager = nil
}
image.fu = nil
}
// Fork returns a duplicate of image. The copied TaskImage always has an
// independent arch.Context64. If shareAddressSpace is true, the copied
// TaskImage shares an address space with the original; otherwise, the copied
// TaskImage has an independent address space that is initially a duplicate
// of the original's.
func (image *TaskImage) Fork(ctx context.Context, k *Kernel, shareAddressSpace bool) (*TaskImage, error) {
newImage := &TaskImage{
Name: image.Name,
Arch: image.Arch.Fork(),
st: image.st,
}
if shareAddressSpace {
newImage.MemoryManager = image.MemoryManager
if newImage.MemoryManager != nil {
if !newImage.MemoryManager.IncUsers() {
// Shouldn't be possible since image.MemoryManager should be a
// counted user.
panic(fmt.Sprintf("TaskImage.Fork called with userless TaskImage.MemoryManager"))
}
}
newImage.fu = image.fu
} else {
newMM, err := image.MemoryManager.Fork(ctx)
if err != nil {
return nil, err
}
newImage.MemoryManager = newMM
newImage.fu = k.futexes.Fork()
}
return newImage, nil
}
// Arch returns t's arch.Context64.
//
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) Arch() *arch.Context64 {
return t.image.Arch
}
// MemoryManager returns t's MemoryManager. MemoryManager does not take an
// additional reference on the returned MM.
//
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) MemoryManager() *mm.MemoryManager {
return t.image.MemoryManager
}
// SyscallTable returns t's syscall table.
//
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) SyscallTable() *SyscallTable {
return t.image.st
}
// Stack returns the userspace stack.
//
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) Stack() *arch.Stack {
return &arch.Stack{
Arch: t.Arch(),
IO: t.MemoryManager(),
Bottom: hostarch.Addr(t.Arch().Stack()),
}
}
// LoadTaskImage loads a specified file into a new TaskImage.
//
// args.MemoryManager does not need to be set by the caller.
func (k *Kernel) LoadTaskImage(ctx context.Context, args loader.LoadArgs) (*TaskImage, *syserr.Error) {
// Prepare a new user address space to load into.
m := mm.NewMemoryManager(k, k, k.SleepForAddressSpaceActivation)
defer m.DecUsers(ctx)
args.MemoryManager = m
os, ac, name, err := loader.Load(ctx, args, k.extraAuxv, k.vdso)
if err != nil {
return nil, err
}
// Lookup our new syscall table.
st, ok := LookupSyscallTable(os, ac.Arch())
if !ok {
// No syscall table found. This means that the ELF binary does not match
// the architecture.
return nil, errNoSyscalls
}
if !m.IncUsers() {
panic("Failed to increment users count on new MM")
}
return &TaskImage{
Name: name,
Arch: ac,
MemoryManager: m,
fu: k.futexes.Fork(),
st: st,
}, nil
}
|