1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
doSwitch PROTO
doEndDetour PROTO
doEndDetour2 PROTO
doStartThread PROTO
PUBLIC doSwitchReturnLoc
.code
;; Parameters:
;; rcx, rdx, r8, r9, xmm0-xmm3
;; Needs preservation:
;; rbx, rbp, rdi, rsi, rsp, r12, r13, r14, r15
;; xmm6 - xmm15
doSwitch proc frame
;; "newDesc": rcx
;; "oldDesc": rdx
;; Prolog, we simply preserve all registers.
push rbp
.pushreg rbp
mov rbp, rsp
push rbx
.pushreg rbx
push rdi
.pushreg rdi
push rsi
.pushreg rsi
push r12
.pushreg r12
push r13
.pushreg r13
push r14
.pushreg r14
push r15
.pushreg r15
;; We need space for:
;; 10 xmm registers (16 bytes each)
;; 3 words for the stack desc
;; Note: We push 8 words above, so we need one wasted before XMM, and one wasted before the desc.
;; Note: We need to provide a shadow space, so that any detours do not trash important data on the stack.
sub rsp, 0E8h
.allocstack 0E8h
.endprolog
;; Store xmm registers.
movaps [rsp+0D0h], xmm6
movaps [rsp+0C0h], xmm7
movaps [rsp+0B0h], xmm8
movaps [rsp+0A0h], xmm9
movaps [rsp+090h], xmm10
movaps [rsp+080h], xmm11
movaps [rsp+070h], xmm12
movaps [rsp+060h], xmm13
movaps [rsp+050h], xmm14
movaps [rsp+040h], xmm15
;; Store stack limits
mov rax, gs:[8h]
mov [rsp+30h], rax
mov rax, gs:[10h]
mov [rsp+28h], rax
mov [rsp+20h], rsp
;; Report the state for the old thread. Note: We need to set 'esp' to the stack limits
;; structure we just created.
lea rax, [rsp+20h]
mov [rdx], rax
;; Switch to the new stack. Again, compensate for the shadow space.
mov rax, [rcx]
lea rsp, [rax-20h]
;; Clear the new stack's desc, so that it will be scanned using our stack pointer instead.
mov QWORD PTR [rcx], 0
doSwitchReturnLoc::
;; Restore state.
mov rax, [rsp+30h]
mov gs:[8h], rax
mov rax, [rsp+28h]
mov gs:[10h], rax
;; xmm registers.
movaps xmm6, [rsp+0D0h]
movaps xmm7, [rsp+0C0h]
movaps xmm8, [rsp+0B0h]
movaps xmm9, [rsp+0A0h]
movaps xmm10, [rsp+090h]
movaps xmm11, [rsp+080h]
movaps xmm12, [rsp+070h]
movaps xmm13, [rsp+060h]
movaps xmm14, [rsp+050h]
movaps xmm15, [rsp+040h]
;; Epilog. Bonus: restores clobbered registers.
add rsp, 0E8h
pop r15
pop r14
pop r13
pop r12
pop rsi
pop rdi
pop rbx
pop rbp
ret
doSwitch endp
doStartThread proc
;; Trampoline to fix stack alignment when starting threads.
;; Stack is currently aligned at even boundary. We need to have it at an odd when we reach 'fn'.
;; Use whatever is in rsi as a simulated return address. We don't expect to use it, but
;; it is necessary for stack traces.
push rsi
xor rsi, rsi ; Avoid accidental GC retention
mov rcx, rdi ; This is the convention from 'pushContext'.
xor rdi, rdi ; Avoid accidental GC retention
jmp rbx
doStartThread endp
doEndDetour proc frame
;; Save registers required to call functions, then call to 'doEndDetour2'.
;; Allocate space on the stack. Note: we also need shadow space for the function call!
;; Note: alignment.
sub rsp, 088h
.allocstack 088h
.endprolog
;; Save all registers needed to call a function (just after shadow space)
mov [rsp+20h], rcx
mov [rsp+28h], rdx
mov [rsp+30h], r8
mov [rsp+38h], r9
movaps [rsp+40h], xmm0
movaps [rsp+50h], xmm1
movaps [rsp+60h], xmm2
movaps [rsp+70h], xmm3
;; Call the end detour function. We give it a pointer to [rsp+A0h] since we use that
;; location to store the jump target.
lea rcx, [rsp+080h]
call doEndDetour2
;; Now we can simply restore the state.
mov rcx, [rsp+20h]
mov rdx, [rsp+28h]
mov r8, [rsp+30h]
mov r9, [rsp+38h]
movaps xmm0, [rsp+40h]
movaps xmm1, [rsp+50h]
movaps xmm2, [rsp+60h]
movaps xmm3, [rsp+70h]
;; Load jump target before the epilog.
mov rax, [rsp+080h]
;; Epilog.
add rsp, 088h
jmp rax
doEndDetour endp
end
|