1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692
|
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "funcdata.h"
#include "textflag.h"
// CPU offsets.
#define CPU_REGISTERS 64
#define CPU_FPU_STATE 280
#define CPU_ERROR_CODE (16+0)
#define CPU_ERROR_TYPE (16+8)
#define CPU_VECTOR (16+16)
#define CPU_FAULT_ADDR (16+24)
#define CPU_ENTRY (16+32)
#define CPU_HAS_XSAVE (16+40)
#define CPU_HAS_XSAVEOPT (16+41)
#define ENTRY_SCRATCH0 256
#define ENTRY_STACK_TOP 264
#define ENTRY_CPU_SELF 272
#define ENTRY_KERNEL_CR3 280
// Bits.
#define _RFLAGS_IF 512
#define _RFLAGS_IOPL0 4096
#define _KERNEL_FLAGS 2
// Vectors.
#define DivideByZero 0
#define Debug 1
#define NMI 2
#define Breakpoint 3
#define Overflow 4
#define BoundRangeExceeded 5
#define InvalidOpcode 6
#define DeviceNotAvailable 7
#define DoubleFault 8
#define CoprocessorSegmentOverrun 9
#define InvalidTSS 10
#define SegmentNotPresent 11
#define StackSegmentFault 12
#define GeneralProtectionFault 13
#define PageFault 14
#define X87FloatingPointException 16
#define AlignmentCheck 17
#define MachineCheck 18
#define SIMDFloatingPointException 19
#define VirtualizationException 20
#define SecurityException 30
#define SyscallInt80 128
#define Syscall 256
#define PTRACE_R15 0
#define PTRACE_R14 8
#define PTRACE_R13 16
#define PTRACE_R12 24
#define PTRACE_RBP 32
#define PTRACE_RBX 40
#define PTRACE_R11 48
#define PTRACE_R10 56
#define PTRACE_R9 64
#define PTRACE_R8 72
#define PTRACE_RAX 80
#define PTRACE_RCX 88
#define PTRACE_RDX 96
#define PTRACE_RSI 104
#define PTRACE_RDI 112
#define PTRACE_ORIGRAX 120
#define PTRACE_RIP 128
#define PTRACE_CS 136
#define PTRACE_FLAGS 144
#define PTRACE_RSP 152
#define PTRACE_SS 160
#define PTRACE_FS_BASE 168
#define PTRACE_GS_BASE 176
// Saves a register set.
//
// This is a macro because it may need to executed in contents where a stack is
// not available for calls.
//
// The following registers are not saved: AX, SP, IP, FLAGS, all segments.
#define REGISTERS_SAVE(reg, offset) \
MOVQ R15, offset+PTRACE_R15(reg); \
MOVQ R14, offset+PTRACE_R14(reg); \
MOVQ R13, offset+PTRACE_R13(reg); \
MOVQ R12, offset+PTRACE_R12(reg); \
MOVQ BP, offset+PTRACE_RBP(reg); \
MOVQ BX, offset+PTRACE_RBX(reg); \
MOVQ CX, offset+PTRACE_RCX(reg); \
MOVQ DX, offset+PTRACE_RDX(reg); \
MOVQ R11, offset+PTRACE_R11(reg); \
MOVQ R10, offset+PTRACE_R10(reg); \
MOVQ R9, offset+PTRACE_R9(reg); \
MOVQ R8, offset+PTRACE_R8(reg); \
MOVQ SI, offset+PTRACE_RSI(reg); \
MOVQ DI, offset+PTRACE_RDI(reg);
// Loads a register set.
//
// This is a macro because it may need to executed in contents where a stack is
// not available for calls.
//
// The following registers are not loaded: AX, SP, IP, FLAGS, all segments.
#define REGISTERS_LOAD(reg, offset) \
MOVQ offset+PTRACE_R15(reg), R15; \
MOVQ offset+PTRACE_R14(reg), R14; \
MOVQ offset+PTRACE_R13(reg), R13; \
MOVQ offset+PTRACE_R12(reg), R12; \
MOVQ offset+PTRACE_RBP(reg), BP; \
MOVQ offset+PTRACE_RBX(reg), BX; \
MOVQ offset+PTRACE_RCX(reg), CX; \
MOVQ offset+PTRACE_RDX(reg), DX; \
MOVQ offset+PTRACE_R11(reg), R11; \
MOVQ offset+PTRACE_R10(reg), R10; \
MOVQ offset+PTRACE_R9(reg), R9; \
MOVQ offset+PTRACE_R8(reg), R8; \
MOVQ offset+PTRACE_RSI(reg), SI; \
MOVQ offset+PTRACE_RDI(reg), DI;
// WRITE_CR3() writes the given CR3 value.
//
// The code corresponds to:
//
// mov %rax, %cr3
//
#define WRITE_CR3() \
BYTE $0x0f; BYTE $0x22; BYTE $0xd8;
// SWAP_GS swaps the kernel GS (CPU).
#define SWAP_GS() \
BYTE $0x0F; BYTE $0x01; BYTE $0xf8;
// IRET returns from an interrupt frame.
#define IRET() \
BYTE $0x48; BYTE $0xcf;
// SYSRET64 executes the sysret instruction.
#define SYSRET64() \
BYTE $0x48; BYTE $0x0f; BYTE $0x07;
// LOAD_KERNEL_STACK loads the kernel stack.
#define LOAD_KERNEL_STACK(entry) \
MOVQ ENTRY_STACK_TOP(entry), SP;
// ADDR_OF_FUNC defines a function named 'name' that returns the address of
// 'symbol'.
#define ADDR_OF_FUNC(name, symbol) \
TEXT name,$0-8; \
MOVQ $symbol, AX; \
MOVQ AX, ret+0(FP); \
RET
// See kernel.go.
TEXT ·Halt(SB),NOSPLIT,$0
HLT
RET
// See kernel_amd64.go.
TEXT ·HaltAndWriteFSBase(SB),NOSPLIT,$8-8
HLT
// Restore FS_BASE.
MOVQ regs+0(FP), AX
MOVQ PTRACE_FS_BASE(AX), AX
PUSHQ AX // First argument (FS_BASE)
CALL ·writeFS(SB)
POPQ AX
RET
// See entry_amd64.go.
TEXT ·swapgs(SB),NOSPLIT,$0
SWAP_GS()
RET
// jumpToKernel changes execution to the kernel address space.
//
// This works by changing the return value to the kernel version.
TEXT ·jumpToKernel(SB),NOSPLIT,$0
MOVQ 0(SP), AX
ORQ ·KernelStartAddress(SB), AX // Future return value.
MOVQ AX, 0(SP)
RET
// jumpToUser changes execution to the user address space.
//
// This works by changing the return value to the user version.
TEXT ·jumpToUser(SB),NOSPLIT,$0
// N.B. we can't access KernelStartAddress from the upper half (data
// pages not available), so just naively clear all the upper bits.
// We are assuming a 47-bit virtual address space.
MOVQ $0x00007fffffffffff, AX
MOVQ 0(SP), BX
ANDQ BX, AX // Future return value.
MOVQ AX, 0(SP)
RET
// See kernel_amd64.go.
//
// The 16-byte frame size is for the saved values of MXCSR and the x87 control
// word.
TEXT ·doSwitchToUser(SB),NOSPLIT,$16-48
// We are passed pointers to heap objects, but do not store them in our
// local frame.
NO_LOCAL_POINTERS
// MXCSR and the x87 control word are the only floating point state
// that is callee-save and thus we must save.
STMXCSR mxcsr-0(SP)
FSTCW cw-8(SP)
// Restore application floating point state.
MOVQ cpu+0(FP), SI
MOVQ fpState+16(FP), DI
MOVB ·hasXSAVE(SB), BX
TESTB BX, BX
JZ no_xrstor
// Use xrstor to restore all available fp state. For now, we restore
// everything unconditionally by setting the implicit operand edx:eax
// (the "requested feature bitmap") to all 1's.
MOVL $0xffffffff, AX
MOVL $0xffffffff, DX
BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x2f // XRSTOR64 0(DI)
JMP fprestore_done
no_xrstor:
// Fall back to fxrstor if xsave is not available.
FXRSTOR64 0(DI)
fprestore_done:
// Set application GS.
MOVQ regs+8(FP), R8
SWAP_GS()
MOVQ PTRACE_GS_BASE(R8), AX
PUSHQ AX
CALL ·writeGS(SB)
POPQ AX
// Call sysret() or iret().
MOVQ userCR3+24(FP), CX
MOVQ needIRET+32(FP), R9
ADDQ $-32, SP
MOVQ SI, 0(SP) // cpu
MOVQ R8, 8(SP) // regs
MOVQ CX, 16(SP) // userCR3
TESTQ R9, R9
JNZ do_iret
CALL ·sysret(SB)
JMP done_sysret_or_iret
do_iret:
CALL ·iret(SB)
done_sysret_or_iret:
MOVQ 24(SP), AX // vector
ADDQ $32, SP
MOVQ AX, vector+40(FP)
// Save application floating point state.
MOVQ fpState+16(FP), DI
MOVB ·hasXSAVE(SB), BX
MOVB ·hasXSAVEOPT(SB), CX
TESTB BX, BX
JZ no_xsave
// Use xsave/xsaveopt to save all extended state.
// We save everything unconditionally by setting RFBM to all 1's.
MOVL $0xffffffff, AX
MOVL $0xffffffff, DX
TESTB CX, CX
JZ no_xsaveopt
BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI)
JMP fpsave_done
no_xsaveopt:
BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI)
JMP fpsave_done
no_xsave:
FXSAVE64 0(DI)
fpsave_done:
// Restore MXCSR and the x87 control word after one of the two floating
// point save cases above, to ensure the application versions are saved
// before being clobbered here.
LDMXCSR mxcsr-0(SP)
// FLDCW is a "waiting" x87 instruction, meaning it checks for pending
// unmasked exceptions before executing. Thus if userspace has unmasked
// an exception and has one pending, it can be raised by FLDCW even
// though the new control word will mask exceptions. To prevent this,
// we must first clear pending exceptions (which will be restored by
// XRSTOR, et al).
BYTE $0xDB; BYTE $0xE2; // FNCLEX
FLDCW cw-8(SP)
RET
// See entry_amd64.go.
TEXT ·sysret(SB),NOSPLIT,$0-32
// Set application FS. We can't do this in Go because Go code needs FS.
MOVQ regs+8(FP), AX
MOVQ PTRACE_FS_BASE(AX), AX
PUSHQ AX
CALL ·writeFS(SB)
POPQ AX
CALL ·jumpToKernel(SB)
// Save original state and stack. sysenter() or exception()
// from APP(gr3) will switch to this stack, set the return
// value (vector: 32(SP)) and then do RET, which will also
// automatically return to the lower half.
MOVQ cpu+0(FP), BX
MOVQ regs+8(FP), AX
MOVQ userCR3+16(FP), CX
MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX)
MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX)
MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX)
// save SP AX userCR3 on the kernel stack.
MOVQ CPU_ENTRY(BX), BX
LOAD_KERNEL_STACK(BX)
PUSHQ PTRACE_RSP(AX)
PUSHQ PTRACE_RAX(AX)
PUSHQ CX
// Restore user register state.
REGISTERS_LOAD(AX, 0)
MOVQ PTRACE_RIP(AX), CX // Needed for SYSRET.
MOVQ PTRACE_FLAGS(AX), R11 // Needed for SYSRET.
// restore userCR3, AX, SP.
POPQ AX // Get userCR3.
WRITE_CR3() // Switch to userCR3.
POPQ AX // Restore AX.
POPQ SP // Restore SP.
SYSRET64()
// sysenter or exception will write our return value and return to our
// caller.
// See entry_amd64.go.
TEXT ·iret(SB),NOSPLIT,$0-32
// Set application FS. We can't do this in Go because Go code needs FS.
MOVQ regs+8(FP), AX
MOVQ PTRACE_FS_BASE(AX), AX
PUSHQ AX // First argument (FS_BASE)
CALL ·writeFS(SB)
POPQ AX
CALL ·jumpToKernel(SB)
// Save original state and stack. sysenter() or exception()
// from APP(gr3) will switch to this stack, set the return
// value (vector: 32(SP)) and then do RET, which will also
// automatically return to the lower half.
MOVQ cpu+0(FP), BX
MOVQ regs+8(FP), AX
MOVQ userCR3+16(FP), CX
MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX)
MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX)
MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX)
// Build an IRET frame & restore state.
MOVQ CPU_ENTRY(BX), BX
LOAD_KERNEL_STACK(BX)
PUSHQ PTRACE_SS(AX)
PUSHQ PTRACE_RSP(AX)
PUSHQ PTRACE_FLAGS(AX)
PUSHQ PTRACE_CS(AX)
PUSHQ PTRACE_RIP(AX)
PUSHQ PTRACE_RAX(AX) // Save AX on kernel stack.
PUSHQ CX // Save userCR3 on kernel stack.
REGISTERS_LOAD(AX, 0) // Restore most registers.
POPQ AX // Get userCR3.
WRITE_CR3() // Switch to userCR3.
POPQ AX // Restore AX.
IRET()
// sysenter or exception will write our return value and return to our
// caller.
// See entry_amd64.go.
TEXT ·resume(SB),NOSPLIT,$0
// See iret, above.
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
PUSHQ CPU_REGISTERS+PTRACE_SS(AX)
PUSHQ CPU_REGISTERS+PTRACE_RSP(AX)
PUSHQ CPU_REGISTERS+PTRACE_FLAGS(AX)
PUSHQ CPU_REGISTERS+PTRACE_CS(AX)
PUSHQ CPU_REGISTERS+PTRACE_RIP(AX)
REGISTERS_LOAD(AX, CPU_REGISTERS)
MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX
IRET()
// See entry_amd64.go.
TEXT ·start(SB),NOSPLIT,$0
// N.B. This is the vCPU entrypoint. It is not called from Go code and
// thus pushes and pops values on the stack until calling into Go
// (startGo) because we aren't usually a typical Go assembly frame.
PUSHQ $0x0 // Previous frame pointer.
MOVQ SP, BP // Set frame pointer.
PUSHQ AX // Save CPU.
// Set up environment required by Go before calling startGo: Go needs
// FS_BASE and floating point initialized.
MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX
PUSHQ BX // First argument (FS_BASE)
CALL ·writeFS(SB)
POPQ BX
// First argument (CPU) already at bottom of stack.
CALL ·startGo(SB) // Call Go hook.
JMP ·resume(SB) // Restore to registers.
ADDR_OF_FUNC(·AddrOfStart(SB), ·start(SB));
// See entry_amd64.go.
TEXT ·sysenter(SB),NOSPLIT,$0
// _RFLAGS_IOPL0 is always set in the user mode and it is never set in
// the kernel mode. See the comment of UserFlagsSet for more details.
TESTL $_RFLAGS_IOPL0, R11
JZ kernel
user:
SWAP_GS()
MOVQ AX, ENTRY_SCRATCH0(GS) // Save user AX on scratch.
MOVQ ENTRY_KERNEL_CR3(GS), AX // Get kernel cr3 on AX.
WRITE_CR3() // Switch to kernel cr3.
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX // Get user regs.
REGISTERS_SAVE(AX, 0) // Save all except IP, FLAGS, SP, AX.
MOVQ CX, PTRACE_RIP(AX)
MOVQ R11, PTRACE_FLAGS(AX)
MOVQ SP, PTRACE_RSP(AX)
MOVQ ENTRY_SCRATCH0(GS), CX // Load saved user AX value.
MOVQ CX, PTRACE_RAX(AX) // Save everything else.
MOVQ CX, PTRACE_ORIGRAX(AX)
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
MOVQ CPU_REGISTERS+PTRACE_RSP(AX), SP // Get stacks.
MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code.
MOVQ $1, CPU_ERROR_TYPE(AX) // Set error type to user.
CALL ·jumpToUser(SB)
// Restore kernel FS_BASE.
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX
PUSHQ BX // First argument (FS_BASE)
CALL ·writeFS(SB)
POPQ BX
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
// Return to the kernel, where the frame is:
//
// vector (sp+32)
// userCR3 (sp+24)
// regs (sp+16)
// cpu (sp+8)
// vcpu.Switch (sp+0)
//
MOVQ CPU_REGISTERS+PTRACE_RBP(AX), BP // Original base pointer.
MOVQ $Syscall, 32(SP) // Output vector.
RET
kernel:
// We can't restore the original stack, but we can access the registers
// in the CPU state directly. No need for temporary juggling.
MOVQ AX, ENTRY_SCRATCH0(GS)
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
REGISTERS_SAVE(AX, CPU_REGISTERS)
MOVQ CX, CPU_REGISTERS+PTRACE_RIP(AX)
MOVQ R11, CPU_REGISTERS+PTRACE_FLAGS(AX)
MOVQ SP, CPU_REGISTERS+PTRACE_RSP(AX)
MOVQ ENTRY_SCRATCH0(GS), BX
MOVQ BX, CPU_REGISTERS+PTRACE_ORIGRAX(AX)
MOVQ BX, CPU_REGISTERS+PTRACE_RAX(AX)
MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code.
MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel.
MOVQ $0xffffffffffffffff, CPU_VECTOR(AX) // Set error type to kernel.
// Save floating point state. CPU.floatingPointState is a slice, so the
// first word of CPU.floatingPointState is a pointer to the destination
// array.
MOVQ CPU_FPU_STATE(AX), DI
MOVB CPU_HAS_XSAVE(AX), BX
MOVB CPU_HAS_XSAVEOPT(AX), CX
TESTB BX, BX
JZ no_xsave
// Use xsave/xsaveopt to save all extended state.
// We save everything unconditionally by setting RFBM to all 1's.
MOVL $0xffffffff, AX
MOVL $0xffffffff, DX
TESTB CX, CX
JZ no_xsaveopt
BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI)
JMP fpsave_done
no_xsaveopt:
BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI)
JMP fpsave_done
no_xsave:
FXSAVE64 0(DI)
fpsave_done:
// Call the syscall trampoline.
LOAD_KERNEL_STACK(GS)
MOVQ ENTRY_CPU_SELF(GS), AX // AX contains the vCPU.
PUSHQ AX // First argument (vCPU).
CALL ·kernelSyscall(SB) // Call the trampoline.
POPQ AX // Pop vCPU.
// We only trigger a bluepill entry in the bluepill function, and can
// therefore be guaranteed that there is no floating point state to be
// loaded on resuming from halt.
JMP ·resume(SB)
ADDR_OF_FUNC(·addrOfSysenter(SB), ·sysenter(SB));
// exception is a generic exception handler.
//
// There are two cases handled:
//
// 1) An exception in kernel mode: this results in saving the state at the time
// of the exception and calling the defined hook.
//
// 2) An exception in guest mode: the original kernel frame is restored, and
// the vector & error codes are pushed as return values.
//
// See below for the stubs that call exception.
TEXT ·exception(SB),NOSPLIT,$0
// Determine whether the exception occurred in kernel mode or user
// mode, based on the flags. We expect the following stack:
//
// SS (sp+48)
// SP (sp+40)
// FLAGS (sp+32)
// CS (sp+24)
// IP (sp+16)
// ERROR_CODE (sp+8)
// VECTOR (sp+0)
//
TESTL $_RFLAGS_IOPL0, 32(SP)
JZ kernel
user:
SWAP_GS()
ADDQ $-8, SP // Adjust for flags.
MOVQ $_KERNEL_FLAGS, 0(SP); BYTE $0x9d; // Reset flags (POPFQ).
PUSHQ AX // Save user AX on stack.
MOVQ ENTRY_KERNEL_CR3(GS), AX // Get kernel cr3 on AX.
WRITE_CR3() // Switch to kernel cr3.
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX // Get user regs.
REGISTERS_SAVE(AX, 0) // Save all except IP, FLAGS, SP, AX.
POPQ BX // Restore original AX.
MOVQ BX, PTRACE_RAX(AX) // Save it.
MOVQ BX, PTRACE_ORIGRAX(AX)
MOVQ 16(SP), BX; MOVQ BX, PTRACE_RIP(AX)
MOVQ 24(SP), CX; MOVQ CX, PTRACE_CS(AX)
MOVQ 32(SP), DX; MOVQ DX, PTRACE_FLAGS(AX)
MOVQ 40(SP), DI; MOVQ DI, PTRACE_RSP(AX)
MOVQ 48(SP), SI; MOVQ SI, PTRACE_SS(AX)
CALL ·jumpToUser(SB)
// Restore kernel FS_BASE.
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX
PUSHQ BX // First argument (FS_BASE)
CALL ·writeFS(SB)
POPQ BX
// Copy out and return.
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
MOVQ 0(SP), BX // Load vector.
MOVQ 8(SP), CX // Load error code.
MOVQ CPU_REGISTERS+PTRACE_RSP(AX), SP // Original stack (kernel version).
MOVQ CPU_REGISTERS+PTRACE_RBP(AX), BP // Original base pointer.
MOVQ CX, CPU_ERROR_CODE(AX) // Set error code.
MOVQ $1, CPU_ERROR_TYPE(AX) // Set error type to user.
MOVQ BX, 32(SP) // Output vector.
RET
kernel:
// As per above, we can save directly.
PUSHQ AX
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
REGISTERS_SAVE(AX, CPU_REGISTERS)
POPQ BX
MOVQ BX, CPU_REGISTERS+PTRACE_RAX(AX)
MOVQ BX, CPU_REGISTERS+PTRACE_ORIGRAX(AX)
MOVQ 16(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_RIP(AX)
MOVQ 32(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_FLAGS(AX)
MOVQ 40(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_RSP(AX)
// Set the error code and adjust the stack.
MOVQ 8(SP), BX // Load the error code.
MOVQ BX, CPU_ERROR_CODE(AX) // Copy out to the CPU.
MOVQ 0(SP), BX // Load the error code.
MOVQ BX, CPU_VECTOR(AX) // Copy out to the CPU.
BYTE $0x0f; BYTE $0x20; BYTE $0xd3; // MOV CR2, RBX
MOVQ BX, CPU_FAULT_ADDR(AX)
MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel.
// Save floating point state. CPU.floatingPointState is a slice, so the
// first word of CPU.floatingPointState is a pointer to the destination
// array.
MOVQ CPU_FPU_STATE(AX), DI
MOVB CPU_HAS_XSAVE(AX), BX
MOVB CPU_HAS_XSAVEOPT(AX), CX
TESTB BX, BX
JZ no_xsave
// Use xsave/xsaveopt to save all extended state.
// We save everything unconditionally by setting RFBM to all 1's.
MOVL $0xffffffff, AX
MOVL $0xffffffff, DX
TESTB CX, CX
JZ no_xsaveopt
BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI)
JMP fpsave_done
no_xsaveopt:
BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI)
JMP fpsave_done
no_xsave:
FXSAVE64 0(DI)
fpsave_done:
// Call the exception trampoline.
MOVQ 0(SP), BX // BX contains the vector.
LOAD_KERNEL_STACK(GS)
MOVQ ENTRY_CPU_SELF(GS), AX // AX contains the vCPU.
PUSHQ BX // Second argument (vector).
PUSHQ AX // First argument (vCPU).
CALL ·kernelException(SB) // Call the trampoline.
POPQ BX // Pop vector.
POPQ AX // Pop vCPU.
// We only trigger a bluepill entry in the bluepill function, and can
// therefore be guaranteed that there is no floating point state to be
// loaded on resuming from halt.
JMP ·resume(SB)
#define EXCEPTION_WITH_ERROR(value, symbol, addr) \
ADDR_OF_FUNC(addr, symbol); \
TEXT symbol,NOSPLIT,$0; \
PUSHQ $value; \
JMP ·exception(SB);
#define EXCEPTION_WITHOUT_ERROR(value, symbol, addr) \
ADDR_OF_FUNC(addr, symbol); \
TEXT symbol,NOSPLIT,$0; \
PUSHQ $0x0; \
PUSHQ $value; \
JMP ·exception(SB);
EXCEPTION_WITHOUT_ERROR(DivideByZero, ·divideByZero(SB), ·addrOfDivideByZero(SB))
EXCEPTION_WITHOUT_ERROR(Debug, ·debug(SB), ·addrOfDebug(SB))
EXCEPTION_WITHOUT_ERROR(NMI, ·nmi(SB), ·addrOfNMI(SB))
EXCEPTION_WITHOUT_ERROR(Breakpoint, ·breakpoint(SB), ·addrOfBreakpoint(SB))
EXCEPTION_WITHOUT_ERROR(Overflow, ·overflow(SB), ·addrOfOverflow(SB))
EXCEPTION_WITHOUT_ERROR(BoundRangeExceeded, ·boundRangeExceeded(SB), ·addrOfBoundRangeExceeded(SB))
EXCEPTION_WITHOUT_ERROR(InvalidOpcode, ·invalidOpcode(SB), ·addrOfInvalidOpcode(SB))
EXCEPTION_WITHOUT_ERROR(DeviceNotAvailable, ·deviceNotAvailable(SB), ·addrOfDeviceNotAvailable(SB))
EXCEPTION_WITH_ERROR(DoubleFault, ·doubleFault(SB), ·addrOfDoubleFault(SB))
EXCEPTION_WITHOUT_ERROR(CoprocessorSegmentOverrun, ·coprocessorSegmentOverrun(SB), ·addrOfCoprocessorSegmentOverrun(SB))
EXCEPTION_WITH_ERROR(InvalidTSS, ·invalidTSS(SB), ·addrOfInvalidTSS(SB))
EXCEPTION_WITH_ERROR(SegmentNotPresent, ·segmentNotPresent(SB), ·addrOfSegmentNotPresent(SB))
EXCEPTION_WITH_ERROR(StackSegmentFault, ·stackSegmentFault(SB), ·addrOfStackSegmentFault(SB))
EXCEPTION_WITH_ERROR(GeneralProtectionFault, ·generalProtectionFault(SB), ·addrOfGeneralProtectionFault(SB))
EXCEPTION_WITH_ERROR(PageFault, ·pageFault(SB), ·addrOfPageFault(SB))
EXCEPTION_WITHOUT_ERROR(X87FloatingPointException, ·x87FloatingPointException(SB), ·addrOfX87FloatingPointException(SB))
EXCEPTION_WITH_ERROR(AlignmentCheck, ·alignmentCheck(SB), ·addrOfAlignmentCheck(SB))
EXCEPTION_WITHOUT_ERROR(MachineCheck, ·machineCheck(SB), ·addrOfMachineCheck(SB))
EXCEPTION_WITHOUT_ERROR(SIMDFloatingPointException, ·simdFloatingPointException(SB), ·addrOfSimdFloatingPointException(SB))
EXCEPTION_WITHOUT_ERROR(VirtualizationException, ·virtualizationException(SB), ·addrOfVirtualizationException(SB))
EXCEPTION_WITH_ERROR(SecurityException, ·securityException(SB), ·addrOfSecurityException(SB))
EXCEPTION_WITHOUT_ERROR(SyscallInt80, ·syscallInt80(SB), ·addrOfSyscallInt80(SB))
|