File: sysmsg.go

package info (click to toggle)
golang-gvisor-gvisor 0.0~20240729.0-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 21,276 kB
  • sloc: asm: 3,361; ansic: 1,197; cpp: 348; makefile: 92; python: 89; sh: 83
file content (376 lines) | stat: -rw-r--r-- 13,930 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package sysmsg provides a stub signal handler and a communication protocol
// between stub threads and the Sentry.
//
// Note that this package is allowlisted for use of sync/atomic.
//
// +checkalignedignore
package sysmsg

import (
	"fmt"
	"strings"
	"sync/atomic"

	"golang.org/x/sys/unix"
	"gvisor.dev/gvisor/pkg/abi/linux"
	"gvisor.dev/gvisor/pkg/hostarch"
	"gvisor.dev/gvisor/pkg/sentry/platform"
)

// LINT.IfChange
// Per-thread stack layout:
//
// *------------*
// | guard page |
// |------------|
// |            |
// |  sysstack  |
// |            |
// *------------*
// | guard page |
// |------------|
// |            |
// |     ^      |
// |    / \     |
// |     |      |
// |  altstack  |
// |------------|
// |   sysmsg   |
// *------------*
const (
	// PerThreadMemSize is the size of a per-thread memory region.
	PerThreadMemSize = 8 * hostarch.PageSize
	// GuardSize is the size of an unmapped region which is placed right
	// before the signal stack.
	GuardSize                   = hostarch.PageSize
	PerThreadPrivateStackOffset = GuardSize
	PerThreadPrivateStackSize   = 2 * hostarch.PageSize
	// PerThreadStackSharedSize is the size of a per-thread stack region.
	PerThreadSharedStackSize   = 4 * hostarch.PageSize
	PerThreadSharedStackOffset = 4 * hostarch.PageSize
	// MsgOffsetFromStack is the offset of the Msg structure on
	// the thread stack.
	MsgOffsetFromSharedStack = PerThreadMemSize - hostarch.PageSize - PerThreadSharedStackOffset

	// SpinningQueueMemSize is the size of a spinning queue memory region.
	SpinningQueueMemSize = hostarch.PageSize
)

// StackAddrToMsg returns an address of a sysmsg structure.
func StackAddrToMsg(sp uintptr) uintptr {
	return sp + MsgOffsetFromSharedStack
}

// StackAddrToSyshandlerStack returns an address of a syshandler stack.
func StackAddrToSyshandlerStack(sp uintptr) uintptr {
	return sp + PerThreadPrivateStackOffset + PerThreadPrivateStackSize
}

// MsgToStackAddr returns a start address of a stack.
func MsgToStackAddr(msg uintptr) uintptr {
	return msg - MsgOffsetFromSharedStack
}

// ThreadState is used to store a state of the sysmsg thread.
type ThreadState uint32

// Set atomicaly sets the state value.
func (s *ThreadState) Set(state ThreadState) {
	atomic.StoreUint32((*uint32)(s), uint32(state))
}

// CompareAndSwap atomicaly compares and swaps the state value.
func (s *ThreadState) CompareAndSwap(old, state ThreadState) bool {
	return atomic.CompareAndSwapUint32((*uint32)(s), uint32(old), uint32(state))
}

// Get returns the current state value.
//
//go:nosplit
func (s *ThreadState) Get() ThreadState {
	return ThreadState(atomic.LoadUint32((*uint32)(s)))
}

const (
	// ThreadStateNone means that the thread is executing the user workload.
	ThreadStateNone ThreadState = iota
	// ThreadStateDone means that last event has been handled and the stub thread
	// can be resumed.
	ThreadStateDone
	// ThreadStatePrep means that syshandler started filling the sysmsg struct.
	ThreadStatePrep
	// ThreadStateAsleep means that this thread fell asleep because there was not
	// enough contexts to process in the context queue.
	ThreadStateAsleep
	// ThreadStateInitializing is only set once at sysmsg thread creation time. It
	// is used to tell the signal handler that the thread does not yet have a
	// context.
	ThreadStateInitializing
)

// Msg contains the current state of the sysmsg thread.
type Msg struct {
	// The next batch of fields is used to call the syshandler stub
	// function. A system call can be replaced with a function call. When
	// a function call is executed, it can't change the current process
	// stack, so it needs to save stack and instruction registers, switch
	// on its syshandler stack and call the jmp instruction to the syshandler
	// address.
	//
	// Self is a pointer to itself in a process address space.
	Self uint64
	// RetAddr is a return address from the syshandler function.
	RetAddr uint64
	// Syshandler is an address of the syshandler function.
	Syshandler uint64
	// SyshandlerStack is an address of  the thread syshandler stack.
	SyshandlerStack uint64
	// AppStack is a value of the stack register before calling the syshandler
	// function.
	AppStack uint64
	// interrupt is non-zero if there is a postponed interrupt.
	interrupt uint32
	// State indicates to the sentry what the sysmsg thread is doing at a given
	// moment.
	State ThreadState
	// Context is a pointer to the ThreadContext struct that the current sysmsg
	// thread is processing.
	Context uint64

	// FaultJump is the size of a faulted instruction.
	FaultJump int32
	// Err is the error value with which the {sig|sys}handler crashes the stub
	// thread (see sysmsg.h:__panic).
	Err int32
	// ErrAdditional is an error value that gives additional information
	// about the panic.
	ErrAdditional int32
	// Line is the code line on which the {sig|sys}handler crashed the stub thread
	// (see sysmsg.h:panic).
	Line int32
	// Debug is a variable to use to get visibility into the stub from the sentry.
	Debug uint64
	// ThreadID is the ID of the sysmsg thread.
	ThreadID uint32
}

// ContextState defines the reason the context has exited back to the sentry,
// or ContextStateNone if running/ready-to-run.
type ContextState uint32

// Set atomicaly sets the state value.
func (s *ContextState) Set(state ContextState) {
	atomic.StoreUint32((*uint32)(s), uint32(state))
}

// Get returns the current state value.
//
//go:nosplit
func (s *ContextState) Get() ContextState {
	return ContextState(atomic.LoadUint32((*uint32)(s)))
}

// Context State types.
const (
	// ContextStateNone means that is either running in the user task or is ready
	// to run in the user task.
	ContextStateNone ContextState = iota
	// ContextStateSyscall means that a syscall event is triggered from the
	// sighandler.
	ContextStateSyscall
	// ContextStateFault means that there is a fault event that needs to be
	// handled.
	ContextStateFault
	// ContextStateSyscallTrap means that a syscall event is triggered from
	// a function call (syshandler).
	ContextStateSyscallTrap
	// ContextStateSyscallCanBePatched means that the syscall can be replaced
	// with a function call.
	ContextStateSyscallCanBePatched
	// ContextStateInvalid is an invalid state that the sentry should never see.
	ContextStateInvalid
)

const (
	// MaxFPStateLen is the largest possible FPState that we will save.
	// Note: This value was chosen to be able to fit ThreadContext into one page.
	MaxFPStateLen uint32 = 3584

	// AllocatedSizeofThreadContextStruct defines how much memory to allocate for
	// one instance of ThreadContext.
	// We over allocate the memory for it because:
	//   - The next instances needs to align to 64 bytes for purposes of xsave.
	//   - It's nice to align it to the page boundary.
	AllocatedSizeofThreadContextStruct uintptr = 4096
)

// ThreadContext contains the current context of the sysmsg thread. The struct
// facilitates switching contexts by allowing the sentry to switch pointers to
// this struct as it needs to.
type ThreadContext struct {
	// FPState is a region of memory where:
	//   - syshandler saves FPU state to using xsave/fxsave
	//   - sighandler copies FPU state to from ucontext->uc_mcontext.fpregs
	// Note that xsave requires this region of memory to be 64 byte aligned;
	// therefore allocations of ThreadContext must be too.
	FPState [MaxFPStateLen]byte
	// FPStateChanged is set to true when the stub thread needs to restore FPState
	// because the sentry changed it.
	FPStateChanged uint64
	// Regs is the context's GP register set. The {sig|sys}handler will save and
	// restore the user app's registers here.
	Regs linux.PtraceRegs

	// SignalInfo is the siginfo struct.
	SignalInfo linux.SignalInfo
	// Signo is the signal that the stub is requesting the sentry to handle.
	Signo int64
	// State indicates the reason why the context has exited back to the sentry.
	State ContextState
	// Interrupt is set to indicate that this context has been interrupted.
	Interrupt uint32
	// ThreadID is the ID of the sysmsg thread that's currently working on the
	// context.
	ThreadID uint32
	// LastThreadID is the ID of the previous sysmsg thread that ran the context
	// (not the one currently working on it). This field is used by sysmsg threads
	// to detect whether fpstate may have changed since the last time they ran a
	// context.
	LastThreadID uint32
	// SentryFastPath is used to indicate to the stub thread that the sentry
	// goroutine used for this thread context is busy-polling for a response
	// instead of using FUTEX_WAIT.
	SentryFastPath uint32
	// AckedTime is used by sysmsg threads to signal to the sentry that this context
	// has been picked up from the context queue and is actively being worked on.
	// The stub thread puts down the timestamp at which it has started processing
	// this context.
	AckedTime uint64
	// StateChangedTime is the time when the ThreadContext.State changed, as
	// recorded by the stub thread when it gave it back to the sentry
	// (the sentry does not populate this field except to reset it).
	StateChangedTime uint64
	// TLS is a pointer to a thread local storage.
	// It is is only populated on ARM64.
	TLS uint64
	// Debug is a variable to use to get visibility into the stub from the sentry.
	Debug uint64
}

// StubError are values that represent known stub-thread failure modes.
// Since these errors originate from the stub threads, look at
// sysmsg.h:stub_error.
type StubError int32

const (
	// StubErrorBadSysmsg indicates sysmsg->self did not match sysmsg.
	StubErrorBadSysmsg StubError = 0x0bad0000 + iota
	// StubErrorBadThreadState indicates sysmsg->state was invalid.
	StubErrorBadThreadState
	// StubErrorBadSpinningQueueDecref indicates stubs removed more threads
	// from spinning queue than were put in.
	StubErrorBadSpinningQueueDecref
	// StubErrorArchPrctl indicates an error when calling arch_prctl.
	StubErrorArchPrctl
	// StubErrorFutex indicates an error when calling futex.
	StubErrorFutex
	// StubErrorBadContextID indicates a context received from the context
	// queue was of unexpected value.
	StubErrorBadContextID
	// StubErrorFpStateBadHeader indicates that the floating point state
	// header did not match the expected value.
	StubErrorFpStateBadHeader
)

// LINT.ThenChange(sysmsg.h)

// Init initializes the message.
func (m *Msg) Init(threadID uint32) {
	m.Err = 0
	m.ErrAdditional = 0
	m.Line = -1
	m.ThreadID = threadID
	m.Context = 0
}

// Init initializes the ThreadContext instance.
func (c *ThreadContext) Init(initialThreadID uint32) {
	c.FPStateChanged = 1
	c.Regs = linux.PtraceRegs{}
	c.Signo = 0
	c.SignalInfo = linux.SignalInfo{}
	c.State = ContextStateNone
	c.ThreadID = initialThreadID
}

// ConvertSysmsgErr converts m.Err to platform.ContextError.
func (m *Msg) ConvertSysmsgErr() *platform.ContextError {
	err := &platform.ContextError{
		Errno: unix.EPERM,
	}

	const prefix = "systrap stub thread failure:"
	suffix := fmt.Sprintf("(failed on line %d; %s)", atomic.LoadInt32(&m.Line), m.String())
	switch StubError(atomic.LoadInt32(&m.Err)) {
	case StubErrorBadSysmsg:
		err.Err = fmt.Errorf("%s sysmsg->self did not match sysmsg during sig/sys-handler %s", prefix, suffix)
	case StubErrorBadThreadState:
		err.Err = fmt.Errorf("%s sysmsg->state was invalid during sys-handler %s", prefix, suffix)
	case StubErrorBadSpinningQueueDecref:
		err.Err = fmt.Errorf("%s imbalanced use of spinning queue %s", prefix, suffix)
	case StubErrorArchPrctl:
		err.Err = fmt.Errorf("%s arch_prctl error=0x%x %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix)
	case StubErrorFutex:
		err.Err = fmt.Errorf("%s futex error=0x%x %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix)
	case StubErrorBadContextID:
		err.Err = fmt.Errorf("%s unexpected context ID (%d) from context queue %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix)
	case StubErrorFpStateBadHeader:
		err.Err = fmt.Errorf("%s FP state context magic header (%d) does not match expected FPSIMD_MAGIC %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix)
	default:
		err.Err = fmt.Errorf("%s unknown reason (0x%x) (possible shared memory corruption) %s", prefix, atomic.LoadInt32(&m.Err), suffix)
	}

	return err
}

func (m *Msg) String() string {
	var b strings.Builder
	fmt.Fprintf(&b, "sysmsg.Msg{msg: %x state %d", m.Self, m.State)
	fmt.Fprintf(&b, " err %x line %d debug %x", m.Err, m.Line, m.Debug)
	fmt.Fprintf(&b, " app stack %x", m.AppStack)
	fmt.Fprintf(&b, " context %x", m.Context)
	fmt.Fprintf(&b, " ThreadID %d", m.ThreadID)
	b.WriteString("}")

	return b.String()
}

func (c *ThreadContext) String() string {
	var b strings.Builder
	fmt.Fprintf(&b, "sysmsg.ThreadContext{state %d", c.State.Get())
	fmt.Fprintf(&b, " fault addr %x syscall %d", c.SignalInfo.Addr(), c.SignalInfo.Syscall())
	fmt.Fprintf(&b, " ip %x sp %x", c.Regs.InstructionPointer(), c.Regs.StackPointer())
	fmt.Fprintf(&b, " FPStateChanged %d Regs %+v", c.FPStateChanged, c.Regs)
	fmt.Fprintf(&b, " Interrupt %d", c.Interrupt)
	fmt.Fprintf(&b, " ThreadID %d LastThreadID %d", c.ThreadID, c.LastThreadID)
	fmt.Fprintf(&b, " SentryFastPath %d Acked %d", c.SentryFastPath, c.AckedTime)
	fmt.Fprintf(&b, " signo: %d, siginfo: %+v", c.Signo, c.SignalInfo)
	fmt.Fprintf(&b, " debug %d", atomic.LoadUint64(&c.Debug))
	b.WriteString("}")

	return b.String()
}