1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
|
// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package sysmsg provides a stub signal handler and a communication protocol
// between stub threads and the Sentry.
//
// Note that this package is allowlisted for use of sync/atomic.
//
// +checkalignedignore
package sysmsg
import (
"fmt"
"strings"
"sync/atomic"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/platform"
)
// LINT.IfChange
// Per-thread stack layout:
//
// *------------*
// | guard page |
// |------------|
// | |
// | sysstack |
// | |
// *------------*
// | guard page |
// |------------|
// | |
// | ^ |
// | / \ |
// | | |
// | altstack |
// |------------|
// | sysmsg |
// *------------*
const (
// PerThreadMemSize is the size of a per-thread memory region.
PerThreadMemSize = 8 * hostarch.PageSize
// GuardSize is the size of an unmapped region which is placed right
// before the signal stack.
GuardSize = hostarch.PageSize
PerThreadPrivateStackOffset = GuardSize
PerThreadPrivateStackSize = 2 * hostarch.PageSize
// PerThreadStackSharedSize is the size of a per-thread stack region.
PerThreadSharedStackSize = 4 * hostarch.PageSize
PerThreadSharedStackOffset = 4 * hostarch.PageSize
// MsgOffsetFromStack is the offset of the Msg structure on
// the thread stack.
MsgOffsetFromSharedStack = PerThreadMemSize - hostarch.PageSize - PerThreadSharedStackOffset
// SpinningQueueMemSize is the size of a spinning queue memory region.
SpinningQueueMemSize = hostarch.PageSize
)
// StackAddrToMsg returns an address of a sysmsg structure.
func StackAddrToMsg(sp uintptr) uintptr {
return sp + MsgOffsetFromSharedStack
}
// StackAddrToSyshandlerStack returns an address of a syshandler stack.
func StackAddrToSyshandlerStack(sp uintptr) uintptr {
return sp + PerThreadPrivateStackOffset + PerThreadPrivateStackSize
}
// MsgToStackAddr returns a start address of a stack.
func MsgToStackAddr(msg uintptr) uintptr {
return msg - MsgOffsetFromSharedStack
}
// ThreadState is used to store a state of the sysmsg thread.
type ThreadState uint32
// Set atomicaly sets the state value.
func (s *ThreadState) Set(state ThreadState) {
atomic.StoreUint32((*uint32)(s), uint32(state))
}
// CompareAndSwap atomicaly compares and swaps the state value.
func (s *ThreadState) CompareAndSwap(old, state ThreadState) bool {
return atomic.CompareAndSwapUint32((*uint32)(s), uint32(old), uint32(state))
}
// Get returns the current state value.
//
//go:nosplit
func (s *ThreadState) Get() ThreadState {
return ThreadState(atomic.LoadUint32((*uint32)(s)))
}
const (
// ThreadStateNone means that the thread is executing the user workload.
ThreadStateNone ThreadState = iota
// ThreadStateDone means that last event has been handled and the stub thread
// can be resumed.
ThreadStateDone
// ThreadStatePrep means that syshandler started filling the sysmsg struct.
ThreadStatePrep
// ThreadStateAsleep means that this thread fell asleep because there was not
// enough contexts to process in the context queue.
ThreadStateAsleep
// ThreadStateInitializing is only set once at sysmsg thread creation time. It
// is used to tell the signal handler that the thread does not yet have a
// context.
ThreadStateInitializing
)
// Msg contains the current state of the sysmsg thread.
type Msg struct {
// The next batch of fields is used to call the syshandler stub
// function. A system call can be replaced with a function call. When
// a function call is executed, it can't change the current process
// stack, so it needs to save stack and instruction registers, switch
// on its syshandler stack and call the jmp instruction to the syshandler
// address.
//
// Self is a pointer to itself in a process address space.
Self uint64
// RetAddr is a return address from the syshandler function.
RetAddr uint64
// Syshandler is an address of the syshandler function.
Syshandler uint64
// SyshandlerStack is an address of the thread syshandler stack.
SyshandlerStack uint64
// AppStack is a value of the stack register before calling the syshandler
// function.
AppStack uint64
// interrupt is non-zero if there is a postponed interrupt.
interrupt uint32
// State indicates to the sentry what the sysmsg thread is doing at a given
// moment.
State ThreadState
// Context is a pointer to the ThreadContext struct that the current sysmsg
// thread is processing.
Context uint64
// FaultJump is the size of a faulted instruction.
FaultJump int32
// Err is the error value with which the {sig|sys}handler crashes the stub
// thread (see sysmsg.h:__panic).
Err int32
// ErrAdditional is an error value that gives additional information
// about the panic.
ErrAdditional int32
// Line is the code line on which the {sig|sys}handler crashed the stub thread
// (see sysmsg.h:panic).
Line int32
// Debug is a variable to use to get visibility into the stub from the sentry.
Debug uint64
// ThreadID is the ID of the sysmsg thread.
ThreadID uint32
}
// ContextState defines the reason the context has exited back to the sentry,
// or ContextStateNone if running/ready-to-run.
type ContextState uint32
// Set atomicaly sets the state value.
func (s *ContextState) Set(state ContextState) {
atomic.StoreUint32((*uint32)(s), uint32(state))
}
// Get returns the current state value.
//
//go:nosplit
func (s *ContextState) Get() ContextState {
return ContextState(atomic.LoadUint32((*uint32)(s)))
}
// Context State types.
const (
// ContextStateNone means that is either running in the user task or is ready
// to run in the user task.
ContextStateNone ContextState = iota
// ContextStateSyscall means that a syscall event is triggered from the
// sighandler.
ContextStateSyscall
// ContextStateFault means that there is a fault event that needs to be
// handled.
ContextStateFault
// ContextStateSyscallTrap means that a syscall event is triggered from
// a function call (syshandler).
ContextStateSyscallTrap
// ContextStateSyscallCanBePatched means that the syscall can be replaced
// with a function call.
ContextStateSyscallCanBePatched
// ContextStateInvalid is an invalid state that the sentry should never see.
ContextStateInvalid
)
const (
// MaxFPStateLen is the largest possible FPState that we will save.
// Note: This value was chosen to be able to fit ThreadContext into one page.
MaxFPStateLen uint32 = 3584
// AllocatedSizeofThreadContextStruct defines how much memory to allocate for
// one instance of ThreadContext.
// We over allocate the memory for it because:
// - The next instances needs to align to 64 bytes for purposes of xsave.
// - It's nice to align it to the page boundary.
AllocatedSizeofThreadContextStruct uintptr = 4096
)
// ThreadContext contains the current context of the sysmsg thread. The struct
// facilitates switching contexts by allowing the sentry to switch pointers to
// this struct as it needs to.
type ThreadContext struct {
// FPState is a region of memory where:
// - syshandler saves FPU state to using xsave/fxsave
// - sighandler copies FPU state to from ucontext->uc_mcontext.fpregs
// Note that xsave requires this region of memory to be 64 byte aligned;
// therefore allocations of ThreadContext must be too.
FPState [MaxFPStateLen]byte
// FPStateChanged is set to true when the stub thread needs to restore FPState
// because the sentry changed it.
FPStateChanged uint64
// Regs is the context's GP register set. The {sig|sys}handler will save and
// restore the user app's registers here.
Regs linux.PtraceRegs
// SignalInfo is the siginfo struct.
SignalInfo linux.SignalInfo
// Signo is the signal that the stub is requesting the sentry to handle.
Signo int64
// State indicates the reason why the context has exited back to the sentry.
State ContextState
// Interrupt is set to indicate that this context has been interrupted.
Interrupt uint32
// ThreadID is the ID of the sysmsg thread that's currently working on the
// context.
ThreadID uint32
// LastThreadID is the ID of the previous sysmsg thread that ran the context
// (not the one currently working on it). This field is used by sysmsg threads
// to detect whether fpstate may have changed since the last time they ran a
// context.
LastThreadID uint32
// SentryFastPath is used to indicate to the stub thread that the sentry
// goroutine used for this thread context is busy-polling for a response
// instead of using FUTEX_WAIT.
SentryFastPath uint32
// AckedTime is used by sysmsg threads to signal to the sentry that this context
// has been picked up from the context queue and is actively being worked on.
// The stub thread puts down the timestamp at which it has started processing
// this context.
AckedTime uint64
// StateChangedTime is the time when the ThreadContext.State changed, as
// recorded by the stub thread when it gave it back to the sentry
// (the sentry does not populate this field except to reset it).
StateChangedTime uint64
// TLS is a pointer to a thread local storage.
// It is is only populated on ARM64.
TLS uint64
// Debug is a variable to use to get visibility into the stub from the sentry.
Debug uint64
}
// StubError are values that represent known stub-thread failure modes.
// Since these errors originate from the stub threads, look at
// sysmsg.h:stub_error.
type StubError int32
const (
// StubErrorBadSysmsg indicates sysmsg->self did not match sysmsg.
StubErrorBadSysmsg StubError = 0x0bad0000 + iota
// StubErrorBadThreadState indicates sysmsg->state was invalid.
StubErrorBadThreadState
// StubErrorBadSpinningQueueDecref indicates stubs removed more threads
// from spinning queue than were put in.
StubErrorBadSpinningQueueDecref
// StubErrorArchPrctl indicates an error when calling arch_prctl.
StubErrorArchPrctl
// StubErrorFutex indicates an error when calling futex.
StubErrorFutex
// StubErrorBadContextID indicates a context received from the context
// queue was of unexpected value.
StubErrorBadContextID
// StubErrorFpStateBadHeader indicates that the floating point state
// header did not match the expected value.
StubErrorFpStateBadHeader
)
// LINT.ThenChange(sysmsg.h)
// Init initializes the message.
func (m *Msg) Init(threadID uint32) {
m.Err = 0
m.ErrAdditional = 0
m.Line = -1
m.ThreadID = threadID
m.Context = 0
}
// Init initializes the ThreadContext instance.
func (c *ThreadContext) Init(initialThreadID uint32) {
c.FPStateChanged = 1
c.Regs = linux.PtraceRegs{}
c.Signo = 0
c.SignalInfo = linux.SignalInfo{}
c.State = ContextStateNone
c.ThreadID = initialThreadID
}
// ConvertSysmsgErr converts m.Err to platform.ContextError.
func (m *Msg) ConvertSysmsgErr() *platform.ContextError {
err := &platform.ContextError{
Errno: unix.EPERM,
}
const prefix = "systrap stub thread failure:"
suffix := fmt.Sprintf("(failed on line %d; %s)", atomic.LoadInt32(&m.Line), m.String())
switch StubError(atomic.LoadInt32(&m.Err)) {
case StubErrorBadSysmsg:
err.Err = fmt.Errorf("%s sysmsg->self did not match sysmsg during sig/sys-handler %s", prefix, suffix)
case StubErrorBadThreadState:
err.Err = fmt.Errorf("%s sysmsg->state was invalid during sys-handler %s", prefix, suffix)
case StubErrorBadSpinningQueueDecref:
err.Err = fmt.Errorf("%s imbalanced use of spinning queue %s", prefix, suffix)
case StubErrorArchPrctl:
err.Err = fmt.Errorf("%s arch_prctl error=0x%x %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix)
case StubErrorFutex:
err.Err = fmt.Errorf("%s futex error=0x%x %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix)
case StubErrorBadContextID:
err.Err = fmt.Errorf("%s unexpected context ID (%d) from context queue %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix)
case StubErrorFpStateBadHeader:
err.Err = fmt.Errorf("%s FP state context magic header (%d) does not match expected FPSIMD_MAGIC %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix)
default:
err.Err = fmt.Errorf("%s unknown reason (0x%x) (possible shared memory corruption) %s", prefix, atomic.LoadInt32(&m.Err), suffix)
}
return err
}
func (m *Msg) String() string {
var b strings.Builder
fmt.Fprintf(&b, "sysmsg.Msg{msg: %x state %d", m.Self, m.State)
fmt.Fprintf(&b, " err %x line %d debug %x", m.Err, m.Line, m.Debug)
fmt.Fprintf(&b, " app stack %x", m.AppStack)
fmt.Fprintf(&b, " context %x", m.Context)
fmt.Fprintf(&b, " ThreadID %d", m.ThreadID)
b.WriteString("}")
return b.String()
}
func (c *ThreadContext) String() string {
var b strings.Builder
fmt.Fprintf(&b, "sysmsg.ThreadContext{state %d", c.State.Get())
fmt.Fprintf(&b, " fault addr %x syscall %d", c.SignalInfo.Addr(), c.SignalInfo.Syscall())
fmt.Fprintf(&b, " ip %x sp %x", c.Regs.InstructionPointer(), c.Regs.StackPointer())
fmt.Fprintf(&b, " FPStateChanged %d Regs %+v", c.FPStateChanged, c.Regs)
fmt.Fprintf(&b, " Interrupt %d", c.Interrupt)
fmt.Fprintf(&b, " ThreadID %d LastThreadID %d", c.ThreadID, c.LastThreadID)
fmt.Fprintf(&b, " SentryFastPath %d Acked %d", c.SentryFastPath, c.AckedTime)
fmt.Fprintf(&b, " signo: %d, siginfo: %+v", c.Signo, c.SignalInfo)
fmt.Fprintf(&b, " debug %d", atomic.LoadUint64(&c.Debug))
b.WriteString("}")
return b.String()
}
|