File: as_callfunc_arm_gcc.S

package info (click to toggle)
supertuxkart 1.4%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 767,580 kB
  • sloc: cpp: 412,075; xml: 106,334; ansic: 83,792; asm: 1,559; python: 1,403; sh: 1,366; objc: 452; makefile: 333; javascript: 23; awk: 20
file content (731 lines) | stat: -rw-r--r-- 26,647 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
/*
  AngelCode Scripting Library
  Copyright (c) 2003-2020 Andreas Jonsson

  This software is provided 'as-is', without any express or implied
  warranty. In no event will the authors be held liable for any
  damages arising from the use of this software.

  Permission is granted to anyone to use this software for any
  purpose, including commercial applications, and to alter it and
  redistribute it freely, subject to the following restrictions:

  1. The origin of this software must not be misrepresented; you
     must not claim that you wrote the original software. If you use
     this software in a product, an acknowledgment in the product
     documentation would be appreciated but is not required.

  2. Altered source versions must be plainly marked as such, and
     must not be misrepresented as being the original software.

  3. This notice may not be removed or altered from any source
     distribution.

  The original version of this library can be located at:
  http://www.angelcode.com/angelscript/

  Andreas Jonsson
  andreas@angelcode.com
*/

/*
   Assembly routines for the ARM call convention
   Written by Fredrik Ehnbom in June 2009

   Adapted to GNUC by darktemplar216 in September 2009

   Modified by Lasse Oorni for 8-byte stack alignment in May 2012

   The assembler routines for Linux were written by Carlos Luna in December 2012
*/

#if !defined(AS_MAX_PORTABILITY)

#if defined(__arm__) || defined(__ARM__) || defined(I3D_ARCH_ARM)

#if !defined(__linux__) || defined(__ANDROID__) || defined(ANDROID) || defined(__SOFTFP__) || defined(__ARM_PCS)

/* iOS, Android, Marmalade, and Linux with soft-float ABI goes here */

.global armFunc
.global armFuncR0
.global armFuncR0R1
.global armFuncObjLast
.global armFuncR0ObjLast

/* --------------------------------------------------------------------------------------------*/
armFunc:
    stmdb   sp!, {r4-r8, lr}
    mov     r6, r0  /* arg table */
    movs    r7, r1  /* arg size (also set the condition code flags so that we detect if there are no arguments) */
    mov     r4, r2  /* function address */
    mov     r8, #0

    beq     nomoreargs

    /* Load the first 4 arguments into r0-r3 */
    cmp     r7, #4
    ldrge   r0, [r6],#4
    cmp     r7, #2*4
    ldrge   r1, [r6],#4
    cmp     r7, #3*4
    ldrge   r2, [r6],#4
    cmp     r7, #4*4
    ldrge   r3, [r6],#4
    ble     nomoreargs

    /* Load the rest of the arguments onto the stack */
    sub     r7, r7, #4*4      /* skip the 4 registers already loaded into r0-r3 */
    add     r8, r7, #4        /* ensure 8-byte stack alignment */
    bic     r8, r8, #4
    sub     sp, sp, r8
    mov     r12, sp           /* copy size != frame size, so store frame start sp */
stackargsloop:
    ldr     r5, [r6], #4
    str     r5, [sp], #4
    subs    r7, r7, #4
    bne     stackargsloop
    mov     sp, r12
nomoreargs:
#if defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_4__)
    mov    lr, pc   /* older ARM didn't support blx */
    mov    pc, r4
#else
    blx     r4
#endif
    add     sp, sp, r8
    ldmia   sp!, {r4-r8, pc}

/* --------------------------------------------------------------------------------------------*/
armFuncObjLast:
    stmdb   sp!, {r4-r8, lr}
    mov     r6, r0  /* arg table */
    movs    r7, r1  /* arg size (also set the condition code flags so that we detect if there are no arguments) */
    mov     r4, r2  /* function address */
    mov     r8, #0

    mov     r0, r3          /* objlast. might get overwritten */
    mov     r5, r3          /* objlast to temp reg */

    beq     nomoreargsarmFuncObjLast

    /* Load the first 4 arguments into r0-r3 */
    cmp     r7, #4
    ldrge   r0, [r6],#4
    cmp     r7, #2*4
    ldrge   r1, [r6],#4
    movlt   r1, r5
    cmp     r7, #3*4
    ldrge   r2, [r6],#4
    movlt   r2, r5
    cmp     r7, #4*4
    ldrge   r3, [r6],#4
    movlt   r3, r5
    blt     nomoreargsarmFuncObjLast

    /* Load the rest of the arguments onto the stack */
    sub     r7, r7, #4*4    /* skip the 4 registers already loaded into r0-r3 */
    add     r8, r7, #8      /* account for the objlast pointer, ensure 8-byte stack alignment */
    bic     r8, r8, #4
    str     r5, [sp,#-4]    /* store the objlast on stack, twice in case we adjusted alignment */
    str     r5, [sp,#-8]
    sub     sp, sp, r8      /* adjust frame */
    cmp     r7, #0          /* we may also have come here with no extra params */
    beq     nomoreargsarmFuncObjLast
    mov     r12, sp         /* copy size != frame size, so store frame start sp */
stackargslooparmFuncObjLast:
    ldr     r5, [r6], #4
    str     r5, [sp], #4
    subs    r7, r7, #4
    bne     stackargslooparmFuncObjLast
    mov     sp, r12
nomoreargsarmFuncObjLast:
#if defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_4__)
    mov    lr, pc   /* older ARM didn't support blx */
    mov    pc, r4
#else
    blx     r4
#endif
    add     sp, sp, r8
    ldmia   sp!, {r4-r8, pc}

/* --------------------------------------------------------------------------------------------*/
armFuncR0ObjLast:
    stmdb   sp!, {r4-r8, lr}
    ldr     r5, [sp,#6*4]   /* objlast to temp reg */

    mov     r6, r0  /* arg table */
    movs    r7, r1  /* arg size (also set the condition code flags so that we detect if there are no arguments) */
    mov     r4, r2  /* function address */
    mov     r8, #0

    mov     r0, r3      /* r0 explicitly set */
    mov     r1, r5      /* objlast.  might get overwritten */

    beq     nomoreargsarmFuncR0ObjLast

    /* Load the first 3 arguments into r1-r3 */
    cmp     r7, #1*4
    ldrge   r1, [r6],#4
    cmp     r7, #2*4
    ldrge   r2, [r6],#4
    movlt   r2, r5
    cmp     r7, #3*4
    ldrge   r3, [r6],#4
    movlt   r3, r5
    blt     nomoreargsarmFuncR0ObjLast

    /* Load the rest of the arguments onto the stack */
    sub     r7, r7, #3*4    /* skip the 3 registers already loaded into r1-r3 */
    add     r8, r7, #8      /* account for the objlast pointer, ensure 8-byte stack alignment */
    bic     r8, r8, #4
    str     r5, [sp,#-4]    /* store the objlast on stack, twice in case we adjusted alignment */
    str     r5, [sp,#-8]
    sub     sp, sp, r8      /* adjust frame */
    cmp     r7, #0          /* we may also have come here with no extra params */
    beq     nomoreargsarmFuncR0ObjLast
    mov     r12, sp         /* copy size != frame size, so store frame start sp */
stackargslooparmFuncR0ObjLast:
    ldr     r5, [r6], #4
    str     r5, [sp], #4
    subs    r7, r7, #4
    bne     stackargslooparmFuncR0ObjLast
    mov     sp, r12
nomoreargsarmFuncR0ObjLast:
#if defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_4__)
    mov    lr, pc   /* older ARM didn't support blx */
    mov    pc, r4
#else
    blx     r4
#endif
    add     sp, sp, r8
    ldmia   sp!, {r4-r8, pc}

/* --------------------------------------------------------------------------------------------*/
armFuncR0:
    stmdb   sp!, {r4-r8, lr}
    mov     r6, r0  /* arg table */
    movs    r7, r1  /* arg size (also set the condition code flags so that we detect if there are no arguments) */
    mov     r4, r2  /* function address */
    mov     r8, #0

    mov     r0, r3  /* r0 explicitly set */

    beq     nomoreargsarmFuncR0

    /* Load the first 3 arguments into r1-r3 */
    cmp     r7, #1*4
    ldrge   r1, [r6],#4
    cmp     r7, #2*4
    ldrge   r2, [r6],#4
    cmp     r7, #3*4
    ldrge   r3, [r6],#4
    ble     nomoreargsarmFuncR0

    /* Load the rest of the arguments onto the stack */
    sub     r7, r7, #3*4    /* skip the 3 registers already loaded into r1-r3 */
    add     r8, r7, #4      /* ensure 8-byte stack alignment */
    bic     r8, r8, #4
    sub     sp, sp, r8
    mov     r12, sp         /* copy size != frame size, so store frame start sp */
stackargslooparmFuncR0:
    ldr     r5, [r6], #4
    str     r5, [sp], #4
    subs    r7, r7, #4
    bne     stackargslooparmFuncR0
    mov     sp, r12
nomoreargsarmFuncR0:
#if defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_4__)
    mov    lr, pc   /* older ARM didn't support blx */
    mov    pc, r4
#else
    blx     r4
#endif
    add     sp, sp, r8
    ldmia   sp!, {r4-r8, pc}

/* --------------------------------------------------------------------------------------------*/
armFuncR0R1:
    stmdb   sp!, {r4-r8, lr}
    mov     r6, r0  /* arg table */
    movs    r7, r1  /* arg size (also set the condition code flags so that we detect if there are no arguments) */
    mov     r4, r2  /* function address */
    mov     r8, #0

    mov     r0, r3          /* r0 explicitly set */
    ldr     r1, [sp, #6*4]  /* r1 explicitly set too */

    beq     nomoreargsarmFuncR0R1

    /* Load the first 2 arguments into r2-r3 */
    cmp     r7, #1*4
    ldrge   r2, [r6],#4
    cmp     r7, #2*4
    ldrge   r3, [r6],#4
    ble     nomoreargsarmFuncR0R1

    /* Load the rest of the arguments onto the stack */
    sub     r7, r7, #2*4    /* skip the 2 registers already loaded into r2-r3 */
    add     r8, r7, #4      /* ensure 8-byte stack alignment */
    bic     r8, r8, #4
    sub     sp, sp, r8
    mov     r12, sp         /* copy size != frame size, so store frame start sp */
stackargslooparmFuncR0R1:
    ldr     r5, [r6], #4
    str     r5, [sp], #4
    subs    r7, r7, #4
    bne     stackargslooparmFuncR0R1
    mov     sp, r12
nomoreargsarmFuncR0R1:
#if defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_4__)
    mov    lr, pc   /* older ARM didn't support blx */
    mov    pc, r4
#else
    blx     r4
#endif
    add     sp, sp, r8
    ldmia   sp!, {r4-r8, pc}

/* --------------------------------------------------------------------------------------------*/
#elif defined(__linux__) && !defined(__SOFTFP__) && !defined(__ARM_PCS)

/* The Linux with hard-float ABI code goes here */


/* These codes are suitable for armeabi + vfp  / armeabihf */
/* when using armeabi + vfp, please set C_FLAGS -mfloat-abi=softfp -mfpu=vfp */
/* using armeabihf, please set C_FLAGS -mfloat-abi=hard -mfpu=vfpv3-d16 */

/* if you prefer to run in ARM mode, please add -marm to C_FLAGS */
/* while using thumb mode, please add -mthumb -Wa,-mimplicit-it=thumb */


/* SP is a multiple of 8 when control first enters a program.*/
/* This places an obligation on authors of low level OS, RTOS, and runtime library code to align SP at all points */
/* at which control first enters a body of (AAPCS-conforming) code. (please read "ARM IHI 0046B" document)*/


.section .text

    .align 2        /* Align the function code to a 4-byte (2^n) word boundary. */
#if defined(__thumb__) || defined(__thumb2__)
    .thumb
    .syntax unified
#else
    .arm            /* Use ARM instructions instead of Thumb.*/
#endif
    .fpu vfp
    .globl armFunc  /* Make the function globally accessible.*/
armFunc:
    push    {r4-r8, r10, r11, lr}   /* sp must be 8-byte alignment for ABI compliance, so the pushed registers must be even */

    mov     r6, r0  /* arg table */
    movs    r7, r1  /* arg size (also set the condition code flags so that we detect if there are no arguments) */
    mov     r4, r2  /* function address */

    /* Load float and double args into d0-d7 and s0-s15 */
    add       r10, r6, #272 /* r10 (r6 + 272) points to the first value for the VFP registers */
    mov       r8, #0
    vldmia.64 r10, {d0-d7}  /* Load contents starting at r10 into registers d0-d7 */

    /* If there are no arguments to set into r0-r3 */
    /* go check if there are arguments for the stack */
    beq     stackargs

    /* Load the first 4 arguments into r0-r3 */
    cmp     r7, #4
    ldrge   r0, [r6]
    cmp     r7, #8
    ldrge   r1, [r6, #4]
    cmp     r7, #12
    ldrge   r2, [r6, #8]
    cmp     r7, #16
    ldrge   r3, [r6, #12]

stackargs:
    ldr     r5, [r6, #268]  /* Load stack size into r5 */
    movs    r7, r5          /* Load stack size into r7, checking for 0 args */

    /* If there are no args for the stack, branch */
    beq     nomoreargs

    /* Load the rest of the arguments onto the stack */
    /* Ensure 8-byte stack alignment */
    mov     r8, sp
    sub     sp, sp, r7
    add     r6, r6, #16     /* Set r6 to point to the first arg to be placed on the stack */

    sub     r12, sp, #8
    bic     r12, r12, #7    /* thumb mode couldn't support "bic  sp, sp, #7" instruction */
    sub     r8, r8, r12
    mov     sp, r12         /* copy size != frame size, so store frame start sp, r12(ip) is not callee saved register */

stackargsloop:
    ldr     r5, [r6], #4
    subs    r7, r7, #4
    str     r5, [sp], #4
    bne     stackargsloop
    mov     sp, r12

nomoreargs:
#if defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_4__)
    mov    lr, pc   /* older ARM didn't support blx */
    mov    pc, r4
#else
    blx     r4
#endif
    add     sp, sp, r8
    vstmia.64 r10, {d0-d7}   /* Copy contents of registers d0-d7 to the address stored in r10 */

    pop {r4-r8, r10, r11, pc}

/* --------------------------------------------------------------------------------------------*/
    .align 2        /* Align the function code to a 4-byte (2^n) word boundary. */
#if defined(__thumb__) || defined(__thumb2__)
    .thumb
    .syntax unified
#else
    .arm            /* Use ARM instructions instead of Thumb.*/
#endif
    .globl armFuncObjLast       /* Make the function globally accessible.*/
armFuncObjLast:
    push {r4-r8, r10, r11, lr}  /* Were storing r11 just to keep the stack aligned to an 8 byte boundary */

    mov     r6, r0  /* arg table */
    movs    r7, r1  /* arg size (also set the condition code flags so that we detect if there are no arguments) */
    mov     r4, r2  /* function address */

    mov     r0, r3          /* objlast. might get overwritten */
    mov     r5, #0          /* This will hold an offset of #4 only if objlast couldnt be placed into an "r" register */

    /* Load float and double args into d0-d7 and s0-s15 (r10 holds pointer to first float value) */
    add     r10, r6, #272   /* r10 (r6 + 272) points to the first value for the VFP registers */
    mov     r8, #0
    vldmia.64 r10, {d0-d7}  /* Load contents starting at r10 into registers d0-d7 */

    /* If there are no arguments to set into r0-r3 */
    /* go check if there are arguments for the stack */
    beq     stackargsFuncObjLast

    mov     r5, r3          /* store objlast in r5 temporarily */

    /* Load the first 4 arguments into r0-r3 */
    cmp     r7, #4
    ldrge   r0, [r6]
    cmp     r7, #8
    ldrge   r1, [r6,#4]
    movlt   r1, r5
    cmp     r7, #12
    ldrge   r2, [r6,#8]
    movlt   r2, r5
    cmp     r7, #16
    ldrge   r3, [r6,#12]
    movlt   r3, r5
    movlt   r5, #0                  /* If objlast got placed into a register, r5 = 0 */
    blt     stackargsFuncObjLast    /* If objlast got placed into a register, go to stackargsFuncObjLast */

    str     r5, [r6, #12]           /* Put objlast in r6 + 12 */
    mov     r5, #4                  /* Set r5 with an offset of #4, so objlast can be loaded into the stack */

stackargsFuncObjLast:
    ldr     r7, [r6, #268]  /* Load stack size into r7 */
    add     r7, r7, r5      /* Add the offset placed in r5 (could be #0 or #4) */
    cmp     r7, #0          /* Check for 0 args */

    /* If there are no args for the stack, branch */
    beq     nomoreargsarmFuncObjLast

    /* Load the rest of the arguments onto the stack */
    /* Ensure 8-byte stack alignment */
    mov     r8, sp
    sub     sp, sp, r7
    add     r6, r6, #16     /* Set r6 to point to the first arg to be placed on the stack */

    sub     r12, sp, #8
    sub     r6, r6, r5      /* r6 = r6 - r5 (r5 can be #0 or #4) */
    bic     r12, r12, #7    /* thumb mode couldn't support "bic  sp, sp, #7" instruction */
    sub     r8, r8, r12
    mov     sp, r12         /* copy size != frame size, so store frame start sp, r12(ip) is not callee saved register */

stackargslooparmFuncObjLast:
    ldr     r5, [r6], #4
    subs    r7, r7, #4
    str     r5, [sp], #4
    bne     stackargslooparmFuncObjLast
    mov     sp, r12

nomoreargsarmFuncObjLast:
#if defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_4__)
    mov    lr, pc   /* older ARM didn't support blx */
    mov    pc, r4
#else
    blx     r4
#endif
    add     sp, sp, r8
    vstmia.64   r10, {d0-d7}    /* Copy contents of registers d0-d10 to the address stored in r10 */

    pop   {r4-r8, r10,r11, pc}

/* ------------------------------------------------------------------------------------------- */
    .align 2        /* Align the function code to a 4-byte (2^n) word boundary. */
#if defined(__thumb__) || defined(__thumb2__)
    .thumb
    .syntax unified
#else
    .arm            /* Use ARM instructions instead of Thumb.*/
#endif
    .globl armFuncR0ObjLast     /* Make the function globally accessible.*/
armFuncR0ObjLast:
    push    {r4-r8, r10, r11, lr}

    ldr     r5, [sp,#32]   /* objlast to temp reg */

    mov     r6, r0  /* arg table */
    movs    r7, r1  /* arg size (also set the condition code flags so that we detect if there are no arguments) */
    mov     r4, r2  /* function address */

    mov     r0, r3      /* r0 explicitly set */
    mov     r1, r5      /* objlast.  might get overwritten */
    mov     r5, #0      /* This will hold an offset of #4 or #8 if objlast or one arg couldnt be placed into an "r" register */

    /* Load float and double args into d0-d7 and s0-s15 (r10 holds pointer to first float value) */
    add     r10, r6, #272   /* r10 (r6 + 272) points to the first value for the VFP registers */
    mov     r8, #0
    vldmia.64 r10, {d0-d7}  /* Load contents starting at r10 into registers d0-d7 */

    /* If there are no arguments to set into r0-r3 */
    /* go check if there are arguments for the stack */
    beq     stackargsFuncR0ObjLast

    mov     r5, r1          /* store objlast in r5 temporarily */

    /* Load the first 3 arguments into r1-r3 */
    cmp     r7, #4
    ldrge   r1, [r6]
    cmp     r7, #8
    ldrge   r2, [r6,#4]
    movlt   r2, r5
    cmp     r7, #12
    ldrge   r3, [r6,#8]
    movlt   r3, r5
    movlt   r5, #0                  /* If objlast got placed into a register, r5 = 0 */
    blt     stackargsFuncR0ObjLast  /* If objlast got placed into a register, go to stackargsFuncR0ObjLast */

    cmp     r7, #16                 /* Else if we have one last arg set the offset accordingly and store the arg in the array */
    ldrge   r7, [r6, #12]
    strge   r7, [r6, #8]

    str     r5, [r6, #12]           /* Put objlast in r6 + 12 */
    mov     r5, #0

    movge   r5, #4                  /* Set r5 with an offset of #4 if theres one last arg that couldnt be placed in r registers */
    add     r5, r5, #4              /* Set r5 with an offset of + #4, so objlast can be loaded into the stack */

stackargsFuncR0ObjLast:
    ldr     r7, [r6, #268]  /* Load stack size into r7 */
    add     r7, r7, r5      /* Add the offset placed in r5 (could be #0 or #4) */
    cmp     r7, #0          /* Check for 0 args */

    /* If there are no args for the stack, branch */
    beq     nomoreargsarmFuncR0ObjLast

    /* Load the rest of the arguments onto the stack */
    /* Ensure 8-byte stack alignment */
    mov     r8, sp
    sub     sp, sp, r7
    add     r6, r6, #16     /* Set r6 to point to the first arg to be placed on the stack */

    sub     r12, sp, #8
    sub     r6, r6, r5      /* r6 = r6 - r5 (r5 can be #0 or #4) */
    bic     r12, r12, #7    /* thumb mode couldn't support "bic  sp, sp, #7" instruction */
    sub     r8, r8, r12
    mov     sp, r12         /* copy size != frame size, so store frame start sp, r12(ip) is not callee saved register */

stackargslooparmFuncR0ObjLast:
    ldr     r5, [r6], #4
    subs    r7, r7, #4
    str     r5, [sp], #4
    bne     stackargslooparmFuncR0ObjLast
    mov     sp, r12

nomoreargsarmFuncR0ObjLast:
#if defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_4__)
    mov    lr, pc   /* older ARM didn't support blx */
    mov    pc, r4
#else
    blx     r4
#endif
    add     sp, sp, r8
    vstmia.64   r10, {d0-d7}    /* Copy contents of registers d0-d10 to the address stored in r10 */

    pop {r4-r8, r10, r11, pc}

/* ------------------------------------------------------------------------------------------- */
    .align 2        /* Align the function code to a 4-byte (2^n) word boundary. */
#if defined(__thumb__) || defined(__thumb2__)
    .thumb
    .syntax unified
#else
    .arm            /* Use ARM instructions instead of Thumb.*/
#endif
    .globl armFuncR0        /* Make the function globally accessible.*/
armFuncR0:
    push {r4-r8, r10, r11, lr}

    mov     r6, r0  /* arg table */
    movs    r7, r1  /* arg size (also set the condition code flags so that we detect if there are no arguments) */
    mov     r4, r2  /* function address */
    mov     r11, #0 /* This will hold an offset of #4 only if the last arg that should have been placed into an "r" reg needs to go to the stack */
    mov     r0, r3  /* r0 explicitly set */

    /* Load float and double args into d0-d7 and s0-s15 (r10 holds pointer to first float value) */
    add     r10, r6, #272   /* r10 (r6 + 272) points to the first value for the VFP registers */
    mov     r8, #0
    vldmia.64 r10, {d0-d7}  /* Load contents starting at r10 into registers d0-d7 */

    /* If there are no arguments to set into r0-r3 */
    /* go check if there are arguments for the stack */
    beq     stackargsarmFuncR0

    /* Load the first 3 arguments into r1-r3 */
    cmp     r7, #4
    ldrge   r1, [r6]
    cmp     r7, #8
    ldrge   r2, [r6, #4]
    cmp     r7, #12
    ldrge   r3, [r6, #8]
    cmp     r7, #16
    movge   r11, #4         /* If there is still one arg to be placed, set the offset in r11 to #4 */

stackargsarmFuncR0:
    ldr     r5, [r6, #268]  /* Load stack size into r5 */
    add     r5, r11         /* Add the offset placed in r11 (could be #0 or #4) */
    movs    r7, r5          /* Load stack size into r7, checking for 0 args */

    /* If there are no args for the stack, branch */
    beq     nomoreargsarmFuncR0

    /* Load the rest of the arguments onto the stack */
    /* Ensure 8-byte stack alignment */
    mov     r8, sp
    sub     sp, sp, r7
    add     r6, r6, #16     /* Set r6 to point to the first arg to be placed on the stack */

    sub     r12, sp, #8
    sub     r6, r6, r11     /* r6 = r6 - r11 (r11 can be #0 or #4) */
    bic     r12, r12, #7    /* thumb mode couldn't support "bic  sp, sp, #7" instruction */
    sub     r8, r8, r12
    mov     sp, r12         /* copy size != frame size, so store frame start sp, r12(ip) is not callee saved register */

stackargslooparmFuncR0:
    ldr     r5, [r6], #4
    subs    r7, r7, #4
    str     r5, [sp], #4
    bne     stackargslooparmFuncR0
    mov     sp, r12

nomoreargsarmFuncR0:
#if defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_4__)
    mov    lr, pc   /* older ARM didn't support blx */
    mov    pc, r4
#else
    blx     r4
#endif
    add     sp, sp, r8
    vstmia.64   r10, {d0-d7}    /* Copy contents of registers d0-d10 to the address stored in r10 */

    pop {r4-r8, r10, r11, pc}

/* ------------------------------------------------------------------------------------------- */
    .align 2        /* Align the function code to a 4-byte (2^n) word boundary. */
#if defined(__thumb__) || defined(__thumb2__)
    .thumb
    .syntax unified
#else
    .arm            /* Use ARM instructions instead of Thumb.*/
#endif
    .globl armFuncR0R1      /* Make the function globally accessible.*/
armFuncR0R1:
    push {r4-r8, r10, r11, lr}

    mov     r6, r0  /* arg table */
    movs    r7, r1  /* arg size (also set the condition code flags so that we detect if there are no arguments) */
    mov     r4, r2  /* function address */
    mov     r11, #0 /* This will hold an offset of #4 or #8 only if the last arg (or last 2 args) that should have been placed into "r" regs need to go to the stack */

    mov     r0, r3          /* r0 explicitly set */
    ldr     r1, [sp, #32]   /* r1 explicitly set too */

    /* Load float and double args into d0-d7 and s0-s15 (r10 holds pointer to first float value) */
    add     r10, r6, #272   /* r10 (r6 + 272) points to the first value for the VFP registers */
    mov     r8, #0
    vldmia.64 r10, {d0-d7}  /* Load contents starting at r10 into registers d0-d7 */

    /* If there are no arguments to set into r2-r3 */
    /* go check if there are arguments for the stack */
    beq     stackargsarmFuncR0R1

    /* Load the first 2 arguments into r2-r3 */
    cmp     r7, #4
    ldrge   r2, [r6]
    cmp     r7, #8
    ldrge   r3, [r6, #4]
    cmp     r7, #12
    movge   r11, #4         /* If there is a third arg to be placed, set the offset in r11 to #4 */
    cmp     r7, #16
    movge   r11, #8         /* If there is a fourth arg to be placed, set the offset in r11 to #8 */
    ldrlt   r7, [r6, #8]    /* Else copy the third arg to the correct place in the array */
    strlt   r7, [r6, #12]

stackargsarmFuncR0R1:
    ldr     r5, [r6, #268]  /* Load stack size into r5 */
    add     r5, r11         /* Add the offset placed in r11 (could be #0 or #4 or #8) */
    movs    r7, r5          /* Load stack size into r7, checking for 0 args */

    /* If there are no args for the stack, branch */
    beq     nomoreargsarmFuncR0R1

    /* Load the rest of the arguments onto the stack */
    /* Ensure 8-byte stack alignment */
    mov     r8, sp
    sub     sp, sp, r7
    add     r6, r6, #16     /* Set r6 to point to the first arg to be placed on the stack */

    sub     r12, sp, #8
    sub     r6, r6, r11     /* r6 = r6 - r11 (r11 can be #0 or #4 or #8) */
    bic     r12, r12, #7    /* thumb mode couldn't support "bic  sp, sp, #7" instruction */
    sub     r8, r8, r12
    mov     sp, r12         /* copy size != frame size, so store frame start sp, r12(ip) is not callee saved register */

stackargslooparmFuncR0R1:
    ldr     r5, [r6], #4
    subs    r7, r7, #4
    str     r5, [sp], #4
    bne     stackargslooparmFuncR0R1
    mov     sp, r12

nomoreargsarmFuncR0R1:
#if defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_4__)
    mov    lr, pc   /* older ARM didn't support blx */
    mov    pc, r4
#else
    blx     r4
#endif
    add     sp, sp, r8
    vstmia.64   r10, {d0-d7}    /* Copy contents of registers d0-d10 to the address stored in r10 */

    pop {r4-r8, r10, r11, pc}

#endif /* hard float abi */

#endif /* arm */

#if defined(__linux__) && defined(__ELF__)
/* ref: http://hardened.gentoo.org/gnu-stack.xml 
   ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart */
.section .note.GNU-stack,"",%progbits
#endif

#endif /* !AS_MAX_PORTABILITY */