File: amd64nt.asm

package info (click to toggle)
ocaml 5.3.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 43,124 kB
  • sloc: ml: 355,439; ansic: 51,636; sh: 25,098; asm: 5,413; makefile: 3,673; python: 919; javascript: 273; awk: 253; perl: 59; fortran: 21; cs: 9
file content (690 lines) | stat: -rw-r--r-- 23,960 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
;**************************************************************************
;*                                                                        *
;*                                 OCaml                                  *
;*                                                                        *
;*             Xavier Leroy, projet Gallium, INRIA Rocquencourt           *
;*                                                                        *
;*   Copyright 2006 Institut National de Recherche en Informatique et     *
;*     en Automatique.                                                    *
;*                                                                        *
;*   All rights reserved.  This file is distributed under the terms of    *
;*   the GNU Lesser General Public License version 2.1, with the          *
;*   special exception on linking described in the file LICENSE.          *
;*                                                                        *
;**************************************************************************

; Asm part of the runtime system, AMD64 processor, Intel syntax

; Notes on Win64 calling conventions:
;     function arguments in RCX, RDX, R8, R9 / XMM0 - XMM3
;     caller must reserve 32 bytes of stack space
;     callee must preserve RBX, RBP, RSI, RDI, R12-R15, XMM6-XMM15

        EXTRN  caml_garbage_collection: NEAR
        EXTRN  caml_apply2: NEAR
        EXTRN  caml_apply3: NEAR
        EXTRN  caml_program: NEAR
        EXTRN  caml_array_bound_error_asm: NEAR
        EXTRN  caml_stash_backtrace: NEAR
        EXTRN  caml_try_realloc_stack: NEAR
        EXTRN  caml_try_realloc_stack: NEAR
        EXTRN  caml_exn_Stack_overflow: NEAR
        EXTRN  caml_raise_unhandled_effect: NEAR
        EXTRN  caml_raise_continuation_already_resumed: NEAR
        EXTRN  caml_free_stack: NEAR

; Load caml/domain_state.tbl (via domain_state.inc, to remove C-style comments)
        domain_curr_field = 0
DOMAIN_STATE MACRO _type:REQ, name:REQ
        domain_field_caml_&name EQU domain_curr_field
        domain_curr_field = domain_curr_field + 1
        ; Returning a value turns DOMAIN_STATE into a macro function, which
        ; causes the bracketed parameters to be both required and correctly
        ; parsed. Returning an empty string allows this to be used as though
        ; it were a macro procedure.
        EXITM <>
ENDM

INCLUDE domain_state.inc

; Caml_state(field) expands to the address of field in Caml_state, which is
; always stored in r14.
Caml_state MACRO field:REQ
        EXITM @CatStr(<[r14+>, %(domain_field_caml_&field), <*8]>)
ENDM

SAVE_ALL_REGS MACRO
    ; Save young_ptr
        mov     Caml_state(young_ptr), r15
    ; Now, use r15 to point to the gc_regs bucket
    ; We save r11 first to allow it to be scratch
        mov     r15, Caml_state(gc_regs_buckets)
        mov    qword ptr [r15 + 11*8], r11
        mov     r11,qword ptr [r15] ; next ptr
        mov     Caml_state(gc_regs_buckets), r11
        mov    qword ptr [r15 + 0*8], rax
        mov    qword ptr [r15 + 1*8], rbx
        mov    qword ptr [r15 + 2*8], rdi
        mov    qword ptr [r15 + 3*8], rsi
        mov    qword ptr [r15 + 4*8], rdx
        mov    qword ptr [r15 + 5*8], rcx
        mov    qword ptr [r15 + 6*8], r8
        mov    qword ptr [r15 + 7*8], r9
        mov    qword ptr [r15 + 8*8], r12
        mov    qword ptr [r15 + 9*8], r13
        mov    qword ptr [r15 + 10*8], r10
              ;qword ptr [r15 + 11*8] contains r11 already
        mov    qword ptr [r15 + 12*8], rbp
        movsd   mmword ptr [r15 + (0+13)*8], xmm0
        movsd   mmword ptr [r15 + (1+13)*8], xmm1
        movsd   mmword ptr [r15 + (2+13)*8], xmm2
        movsd   mmword ptr [r15 + (3+13)*8], xmm3
        movsd   mmword ptr [r15 + (4+13)*8], xmm4
        movsd   mmword ptr [r15 + (5+13)*8], xmm5
        movsd   mmword ptr [r15 + (6+13)*8], xmm6
        movsd   mmword ptr [r15 + (7+13)*8], xmm7
        movsd   mmword ptr [r15 + (8+13)*8], xmm8
        movsd   mmword ptr [r15 + (9+13)*8], xmm9
        movsd   mmword ptr [r15 + (10+13)*8], xmm10
        movsd   mmword ptr [r15 + (11+13)*8], xmm11
        movsd   mmword ptr [r15 + (12+13)*8], xmm12
        movsd   mmword ptr [r15 + (13+13)*8], xmm13
        movsd   mmword ptr [r15 + (14+13)*8], xmm14
        movsd   mmword ptr [r15 + (15+13)*8], xmm15
ENDM

RESTORE_ALL_REGS MACRO
    ; Restore rax, freeing up the next ptr slot
        mov     rax,qword ptr [r15 + 0*8]
        mov     r11, Caml_state(gc_regs_buckets)
        mov     qword ptr [r15], r11 ; next ptr
        mov     Caml_state(gc_regs_buckets), r15
    ; above:    rax,qword ptr [r15 + 0*8]
        mov     rbx,qword ptr [r15 + 1*8]
        mov     rdi,qword ptr [r15 + 2*8]
        mov     rsi,qword ptr [r15 + 3*8]
        mov     rdx,qword ptr [r15 + 4*8]
        mov     rcx,qword ptr [r15 + 5*8]
        mov     r8,qword ptr [r15 + 6*8]
        mov     r9,qword ptr [r15 + 7*8]
        mov     r12,qword ptr [r15 + 8*8]
        mov     r13,qword ptr [r15 + 9*8]
        mov     r10,qword ptr [r15 + 10*8]
        mov     r11,qword ptr [r15 + 11*8]
        mov     rbp,qword ptr [r15 + 12*8]
        movsd   xmm0, mmword ptr [r15 + (0+13)*8]
        movsd   xmm1, mmword ptr [r15 + (1+13)*8]
        movsd   xmm2, mmword ptr [r15 + (2+13)*8]
        movsd   xmm3, mmword ptr [r15 + (3+13)*8]
        movsd   xmm4, mmword ptr [r15 + (4+13)*8]
        movsd   xmm5, mmword ptr [r15 + (5+13)*8]
        movsd   xmm6, mmword ptr [r15 + (6+13)*8]
        movsd   xmm7, mmword ptr [r15 + (7+13)*8]
        movsd   xmm8, mmword ptr [r15 + (8+13)*8]
        movsd   xmm9, mmword ptr [r15 + (9+13)*8]
        movsd   xmm10, mmword ptr [r15 + (10+13)*8]
        movsd   xmm11, mmword ptr [r15 + (11+13)*8]
        movsd   xmm12, mmword ptr [r15 + (12+13)*8]
        movsd   xmm13, mmword ptr [r15 + (13+13)*8]
        movsd   xmm14, mmword ptr [r15 + (14+13)*8]
        movsd   xmm15, mmword ptr [r15 + (15+13)*8]
        mov     r15, Caml_state(young_ptr)
ENDM

SWITCH_OCAML_TO_C MACRO
    ; Fill in Caml_state->current_stack->sp
        mov     r10, Caml_state(current_stack)
        mov    qword ptr [r10], rsp
    ; Fill in Caml_state->c_stack
        mov     r11, Caml_state(c_stack)
        mov    qword ptr [r11 + 40], rsp
        mov    qword ptr [r11 + 32], r10
    ; Switch to C stack
        mov     rsp, qword ptr r11
ENDM

SWITCH_C_TO_OCAML MACRO
        mov     rsp,qword ptr [rsp+40]
ENDM

; Callee-save regs are rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15

PUSH_CALLEE_SAVE_REGS MACRO
        push    rbx
        push    rbp
        push    rsi
        push    rdi
        push    r12
        push    r13
        push    r14
        push    r15
        sub     rsp, 10*16       ; stack 16-aligned + 10 saved xmm regs
        movupd  xmmword ptr [rsp + 0*16], xmm6
        movupd  xmmword ptr [rsp + 1*16], xmm7
        movupd  xmmword ptr [rsp + 2*16], xmm8
        movupd  xmmword ptr [rsp + 3*16], xmm9
        movupd  xmmword ptr [rsp + 4*16], xmm10
        movupd  xmmword ptr [rsp + 5*16], xmm11
        movupd  xmmword ptr [rsp + 6*16], xmm12
        movupd  xmmword ptr [rsp + 7*16], xmm13
        movupd  xmmword ptr [rsp + 8*16], xmm14
        movupd  xmmword ptr [rsp + 9*16], xmm15
ENDM

POP_CALLEE_SAVE_REGS MACRO
        movupd  xmm6, xmmword ptr [rsp + 0*16]
        movupd  xmm7, xmmword ptr [rsp + 1*16]
        movupd  xmm8, xmmword ptr [rsp + 2*16]
        movupd  xmm9, xmmword ptr [rsp + 3*16]
        movupd  xmm10, xmmword ptr [rsp + 4*16]
        movupd  xmm11, xmmword ptr [rsp + 5*16]
        movupd  xmm12, xmmword ptr [rsp + 6*16]
        movupd  xmm13, xmmword ptr [rsp + 7*16]
        movupd  xmm14, xmmword ptr [rsp + 8*16]
        movupd  xmm15, xmmword ptr [rsp + 9*16]
        add     rsp, 10*16
        pop     r15
        pop     r14
        pop     r13
        pop     r12
        pop     rdi
        pop     rsi
        pop     rbp
        pop     rbx
ENDM

RESTORE_EXN_HANDLER_OCAML MACRO
        mov     rsp, Caml_state(exn_handler)
        lea     r11, Caml_state(exn_handler)
        pop     qword ptr [r11]
ENDM

SWITCH_OCAML_STACKS MACRO
        mov     qword ptr [rsi], rsp
        mov     r12, Caml_state(exn_handler)
        mov     qword ptr [rsi+8], r12
        mov     Caml_state(current_stack), r10
        mov     rsp,qword ptr [r10]
        mov     r12,qword ptr [r10+8]
        mov     Caml_state(exn_handler), r12
ENDM

        .CODE

        PUBLIC  caml_system__code_begin
caml_system__code_begin:
        ret  ; just one instruction, so that debuggers don't display
             ; caml_system__code_begin instead of caml_call_gc

; Allocation

        PUBLIC  caml_call_realloc_stack
        ALIGN   4
caml_call_realloc_stack:
        SAVE_ALL_REGS
        mov     rcx,qword ptr [rsp+8]
        SWITCH_OCAML_TO_C
        call    caml_try_realloc_stack
        SWITCH_C_TO_OCAML
        cmp     rax, 0
        jz      L104
        RESTORE_ALL_REGS
        ret
L104:
        RESTORE_ALL_REGS
        lea     rax, caml_exn_Stack_overflow
        add     rsp, 16
        jmp     caml_raise_exn

        PUBLIC  caml_call_gc
        ALIGN   4
caml_call_gc:
        SAVE_ALL_REGS
        mov     Caml_state(gc_regs), r15
    ; Call the garbage collector
        SWITCH_OCAML_TO_C
        call    caml_garbage_collection
        SWITCH_C_TO_OCAML
        mov     r15, Caml_state(gc_regs)
        RESTORE_ALL_REGS
    ; Return to caller
        ret

        PUBLIC  caml_alloc1
        ALIGN   4
caml_alloc1:
        sub     r15, 16
        cmp     r15, Caml_state(young_limit)
        jb      caml_call_gc
        ret

        PUBLIC  caml_alloc2
        ALIGN   4
caml_alloc2:
        sub     r15, 24
        cmp     r15, Caml_state(young_limit)
        jb      caml_call_gc
        ret

        PUBLIC  caml_alloc3
        ALIGN   4
caml_alloc3:
        sub     r15, 32
        cmp     r15, Caml_state(young_limit)
        jb      caml_call_gc
        ret

        PUBLIC  caml_allocN
        ALIGN   4
caml_allocN:
        cmp     r15, Caml_state(young_limit)
        jb      caml_call_gc
        ret

; Call a C function from OCaml

; Update [young_limit] when returning from non-noalloc extern calls.
; Here is C code that can be used to generate RET_FROM_C_CALL for a
; new back-end.

;   #include <stdatomic.h>
;   #include <stdint.h>

;   typedef struct { _Atomic(uint64_t) young_limit;
;                    _Bool action_pending; } caml_domain_state;

;   void ret_from_c_call(caml_domain_state *dom_st)
;   {
;     if (__builtin_expect(dom_st->action_pending, 0))
;       atomic_store_explicit(&dom_st->young_limit, (uint64_t)-1,
;                             memory_order_relaxed);
;   }

RET_FROM_C_CALL MACRO
        LOCAL L1
        cmp     byte ptr Caml_state(action_pending), 0
        jne     L1
        ret
L1:
        mov     qword ptr Caml_state(young_limit), -1
        ret
ENDM

        PUBLIC  caml_c_call
        ALIGN   4
caml_c_call:
    ; Arguments:
    ;  C arguments         : rcx, rdx, r8 and r9
    ;  C function          : rax
        SWITCH_OCAML_TO_C
    ; Make the alloc ptr available to the C code
        mov     Caml_state(young_ptr), r15
    ; Call the function (address in rax)
        call    rax
    ; Prepare for return to OCaml
        mov     r15, Caml_state(young_ptr)
    ; Load OCaml stack and restore global variables
        SWITCH_C_TO_OCAML
    ; Return to OCaml caller
        RET_FROM_C_CALL

        PUBLIC  caml_c_call_stack_args
        ALIGN 4
caml_c_call_stack_args:
    ; Arguments:
    ;  C arguments         : rcx, rdx, r8 and r9
    ;    C function          : rax
    ;    C stack args        : begin=r13 end=r12
    ; Switch from OCaml to C
        SWITCH_OCAML_TO_C
    ; we use rbx (otherwise unused) to enable backtraces
        mov     rbx, rsp
    ; Make the alloc ptr available to the C code
        mov     Caml_state(young_ptr), r15
    ; Copy arguments from OCaml to C stack
        add     rsp, 32
L105:
        sub     r12, 8
        cmp     r12,r13
        jb      L210
        push    qword ptr [r12]
        jmp     L105
L210:
        sub     rsp, 32
    ; Call the function (address in %rax)
        call    rax
    ; Pop arguments back off the stack
        mov     rsp, Caml_state(c_stack)
    ; Prepare for return to OCaml
        mov     r15, Caml_state(young_ptr)
    ; Load ocaml stack and restore global variables
        SWITCH_C_TO_OCAML
    ; Return to OCaml caller
        RET_FROM_C_CALL

; Start the OCaml program

        PUBLIC  caml_start_program
        ALIGN   4
caml_start_program:
    ; Save callee-save registers
        PUSH_CALLEE_SAVE_REGS
    ; First argument (rcx) is Caml_state. Load it in r14
        mov     r14, rcx
    ; Initial entry point is caml_program
        lea     r12, caml_program
    ; Common code for caml_start_program and caml_callback*
L106:
    ; Load young_ptr into %r15
        mov     r15, Caml_state(young_ptr)
    ; Build struct c_stack_link on the C stack
        sub     rsp, 56 ; sizeof struct c_stack_link
        mov     qword ptr [rsp + 32], 0
        mov     qword ptr [rsp + 40], 0
        mov     r10, Caml_state(c_stack)
        mov     qword ptr [rsp + 48], r10
        mov     Caml_state(c_stack), rsp
    ; Load the OCaml stack.
        mov     r11, Caml_state(current_stack)
        mov     r10, qword ptr [r11]
    ; Store the stack pointer to allow DWARF unwind XXX
        sub     r10, 16
        mov     qword ptr [r10], rsp ; C_STACK_SP
    ; Store the gc_regs for callbacks during a GC
        mov     r11, Caml_state(gc_regs)
        mov     qword ptr [r10 + 8], r11
    ; Build a handler for exceptions raised in OCaml on the OCaml stack.
        sub     r10, 16
        lea     r11, L108
        mov     qword ptr [r10 + 8], r11
    ; link in the previous exn_handler so that copying stacks works
        mov     r11, Caml_state(exn_handler)
        mov     qword ptr [r10], r11
        mov     Caml_state(exn_handler), r10
    ; Switch stacks and call the OCaml code
        mov     rsp, r10
        call    r12
L107:
    ; Pop the exception handler
        mov     r11, qword ptr [rsp]
        mov     Caml_state(exn_handler), r11
        lea     r10, [rsp+16]
L109:
    ; Restore GC regs
        mov     r11, qword ptr [r10+8]
        mov     Caml_state(gc_regs), r11
        add     r10, 16
    ; Update alloc ptr
        mov     Caml_state(young_ptr), r15
    ; Return to C stack.
        mov     r11, Caml_state(current_stack)
        mov     qword ptr [r11], r10
        mov     rsp, Caml_state(c_stack)
    ; Pop the struct c_stack_link
        mov     r10, qword ptr [rsp+48]
        mov     Caml_state(c_stack), r10
        add     rsp, 56
    ; Restore callee-save registers.
        POP_CALLEE_SAVE_REGS
    ; Return to caller
        ret
L108:
    ; Exception handler
    ; Mark the bucket as an exception result and return it
        or      rax, 2
        ; exn handler already popped here
        mov     r10, rsp
        jmp     L109

; Raise an exception from OCaml

        PUBLIC  caml_raise_exn
        ALIGN   4
caml_raise_exn:
        test    qword ptr Caml_state(backtrace_active), 1
        jne     L110
        RESTORE_EXN_HANDLER_OCAML
        ret
L110:
        mov     qword ptr Caml_state(backtrace_pos), 0
L117:
        mov     r13, rsp             ; Save OCaml stack pointer
        mov     r12, rax             ; Save exception bucket
        mov     rsp, Caml_state(c_stack)
        mov     rcx, rax             ; Arg 1: exception bucket
        mov     rdx, qword ptr [r13] ; Arg 2: PC of raise
        lea     r8, [r13+8]          ; Arg 3: SP of raise
        mov     r9, Caml_state(exn_handler) ; Arg 4: SP of handler
        call    caml_stash_backtrace
        mov     rax, r12             ; Recover exception bucket
        RESTORE_EXN_HANDLER_OCAML
        ret

        PUBLIC  caml_reraise_exn
        ALIGN   4
caml_reraise_exn:
        test     qword ptr Caml_state(backtrace_active), 1
        jne     L117
        RESTORE_EXN_HANDLER_OCAML
        ret

; Raise an exception from C

        PUBLIC  caml_raise_exception
        ALIGN   4
caml_raise_exception:
        mov     r14, rcx                   ; First argument is Caml_state
        mov     rax, rdx                   ; Second argument is exn bucket
        mov     r15, Caml_state(young_ptr) ; Reload alloc ptr
    ; Discard the C stack pointer and reset to OCaml stack
        mov     r10, Caml_state(current_stack)
        mov     rsp, qword ptr [r10]
        jmp     caml_raise_exn

; Callback from C to OCaml

        PUBLIC  caml_callback_asm
        ALIGN   4
caml_callback_asm:
        PUSH_CALLEE_SAVE_REGS
    ; Initial loading of arguments
        mov     r14, rcx      ; Caml_state
        mov     rbx, rdx      ; closure
        mov     rax, qword ptr [r8]     ; argument
        mov     r12, qword ptr [rbx]    ; code pointer
        mov     rdi, 0 ; XXX dummy?
        mov     rsi, 0 ; XXX dummy?
        jmp     L106

        PUBLIC  caml_callback2_asm
        ALIGN   4
caml_callback2_asm:
        PUSH_CALLEE_SAVE_REGS
    ; Initial loading of arguments
        mov     r14, rcx        ; Caml_state
        mov     rdi, rdx        ; closure
        mov     rax, qword ptr [r8]       ; first argument
        mov     rbx, qword ptr [r8 + 8]   ; second argument
        lea     r12, caml_apply2  ; code pointer
        mov     rsi, 0            ; XXX dummy?
        jmp     L106

        PUBLIC  caml_callback3_asm
        ALIGN   4
caml_callback3_asm:
        PUSH_CALLEE_SAVE_REGS
    ; Initial loading of arguments
        mov     r14, rcx        ; Caml_state
        mov     rax, qword ptr [r8]       ; first argument
        mov     rbx, qword ptr [r8 + 8]   ; second argument
        mov     rsi, rdx        ; closure
        mov     rdi, qword ptr [r8 + 16]  ; third argument
        lea     r12, caml_apply3      ; code pointer
        jmp     L106

; Fibers

        PUBLIC  caml_perform
        ALIGN   4
caml_perform:
    ;  %rax: effect to perform
    ;  %rbx: freshly allocated continuation
        mov     rsi, Caml_state(current_stack) ; %rsi := old stack
        lea     rdi, [rsi + 1] ; %rdi (last_fiber) := Val_ptr(old stack)
        mov     qword ptr [rbx], rdi ; Initialise continuation
do_perform:
    ;  %rax: effect to perform
    ;  %rbx: continuation
    ;  %rdi: last_fiber
    ;  %rsi: old stack *;
        mov     qword ptr [rbx+8], rdi  ; Set last fiber field in continuation
        mov     r11, qword ptr [rsi+16] ; %r11 := old stack -> handler
        mov     r10, qword ptr [r11+24] ; %r10 := parent stack
        cmp     r10, 0                  ; parent is NULL?
        je      L112
        SWITCH_OCAML_STACKS ; preserves r11 and rsi
     ; We have to null the Handler_parent after the switch because the
     ; Handler_parent is needed to unwind the stack for backtraces
        mov     qword ptr [r11+24], 0 ; Set parent of performer to NULL
        mov     rsi, qword ptr [r11+16]  ; %rsi := effect handler
        jmp     caml_apply3
L112:
    ; Switch back to original performer before raising Effect.Unhandled
    ; (no-op unless this is a reperform)
        mov     r10, qword ptr [rbx]  ; load performer stack from continuation
        sub     r10, 1       ; r10 := Ptr_val(r10)
        mov     rsi, Caml_state(current_stack)
        SWITCH_OCAML_STACKS
    ; No parent stack. Raise Effect.Unhandled.
        mov     rcx, rax
        lea     rax, caml_raise_unhandled_effect
        jmp     caml_c_call

        PUBLIC  caml_reperform
        ALIGN   4
caml_reperform:
    ;  %rax: effect to reperform
    ;  %rbx: continuation
    ;  %rdi: last_fiber
        mov     rsi, Caml_state(current_stack)    ; %rsi := old stack
        mov     r10, qword ptr [rdi+15]
        mov     qword ptr [r10+24], rsi       ; Append to last_fiber
        lea     rdi, [rsi + 1]  ; %rdi (last_fiber) := Val_ptr(old stack)
        jmp     do_perform

        PUBLIC  caml_resume
        ALIGN   4
caml_resume:
    ; %rax -> fiber, %rbx -> fun, %rdi -> arg, %rsi -> last_fiber
        lea     r10, [rax-1]  ; %r10 (new stack) = Ptr_val(%rax)
        mov     rax, rdi      ; %rax := argument to the function in %rbx
    ;  check if stack null, then already used
        test    r10, r10
        jz      L502
    ; Add current stack to the last fiber
        mov     rdi, qword ptr [rsi+15]
        mov     rsi, Caml_state(current_stack)
        mov     qword ptr [rdi+24], rsi
        SWITCH_OCAML_STACKS
        jmp     qword ptr [rbx]
L502:
        lea     rax, caml_raise_continuation_already_resumed
        jmp     caml_c_call

        PUBLIC  caml_runstack
        ALIGN   4
caml_runstack:
    ; %rax -> fiber, %rbx -> fun, %rdi -> arg
        and     rax, -2       ; %rax = Ptr_val(%rax)
    ; save old stack pointer and exception handler
        mov     rcx, Caml_state(current_stack)
        mov     r10, Caml_state(exn_handler)
        mov     qword ptr [rcx], rsp
        mov     qword ptr [rcx+8], r10
    ; Load new stack pointer and set parent
        mov     r11, qword ptr [rax+16]
        mov     qword ptr [r11+24], rcx
        mov     Caml_state(current_stack), rax
        mov     r11, qword ptr [rax]
    ; Create an exception handler on the target stack
    ;  after 16byte DWARF & gc_regs block (which is unused here)
        sub     r11, 32
        lea     r10, fiber_exn_handler
        mov     qword ptr [r11+8], r10
    ; link the previous exn_handler so that copying stacks works
        mov     r10, qword ptr [rax+8]
        mov     qword ptr [r11], r10
        mov     Caml_state(exn_handler), r11
    ; Switch to the new stack
        mov     rsp, r11
        mov     rax, rdi ; first argument
        call    qword ptr [rbx] ; closure in %rbx (second argument)
frame_runstack:
        lea     r11, [rsp+32] ; SP with exn handler popped
        mov     rbx, qword ptr [r11]
L610:
        mov     rcx, Caml_state(current_stack) ; arg to caml_free_stack
    ; restore parent stack and exn_handler into Caml_state
        mov     r10, qword ptr [r11+24]
        mov     r11, qword ptr [r10+8]
        mov     Caml_state(current_stack), r10
        mov     Caml_state(exn_handler), r11
    ; free old stack by switching directly to c_stack; is a no-alloc call
        mov     r13, qword ptr [r10]    ; saved across C call
        mov     r12, rax ; save %rax across C call
        mov     rsp, Caml_state(c_stack)
        call  caml_free_stack
    ; switch directly to parent stack with correct return
        mov     rsp, r13
        mov     rax, r12
    ; Invoke handle_value (or handle_exn)
        jmp     qword ptr [rbx]
fiber_exn_handler:
        lea     r11, [rsp+16]
        mov     rbx, qword ptr [r11+8]
        jmp     L610

        PUBLIC  caml_ml_array_bound_error
        ALIGN   4
caml_ml_array_bound_error:
        lea     rax, caml_array_bound_error_asm
        jmp     caml_c_call

        PUBLIC  caml_assert_stack_invariants
        ALIGN   4
caml_assert_stack_invariants:
        mov     r11, Caml_state(current_stack)
        mov     r10, rsp
        sub     r10, r11
        cmp     r10, 296
        jge     L310
        int     3
L310:
        ret

        PUBLIC caml_system__code_end
caml_system__code_end:

        .DATA
        PUBLIC  caml_system$frametable
caml_system$frametable LABEL QWORD
        QWORD   2           ; two descriptors
        QWORD   L107        ; return address into callback
        WORD    -1          ; negative frame size => use callback link
        WORD    0           ; no roots here
        ALIGN   8
        QWORD   frame_runstack
        WORD    -1
        WORD    0

        PUBLIC  caml_negf_mask
        ALIGN   16
caml_negf_mask LABEL QWORD
        QWORD   8000000000000000H, 0

        PUBLIC  caml_absf_mask
        ALIGN   16
caml_absf_mask LABEL QWORD
        QWORD   7FFFFFFFFFFFFFFFH, 0FFFFFFFFFFFFFFFFH

        END