1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
|
//+build !noasm !appengine
// ARROW-15172:
// (C2GOASM doesn't work correctly for Arm64)
// Partly GENERATED BY asm2plan9s.
// func _sum_float64_neon(buf unsafe.Pointer, len uintptr, res unsafe.Pointer)
TEXT ยท_sum_float64_neon(SB), $0-24
MOVD buf+0(FP), R0
MOVD len+8(FP), R1
MOVD res+16(FP), R2
WORD $0xd343fc29 // lsr x9, x1, #3
WORD $0x92400828 // and x8, x1, #0x7
CBZ R9, LBB0_6
WORD $0x927df02a // and x10, x1, #0xfffffffffffffff8
WORD $0x6f00e400 // movi v0.2d, #0000000000000000
WORD $0xaa0003eb // mov x11, x0
LBB0_2:
WORD $0x4cdf2d61 // ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x11], #64
WORD $0xf1000529 // subs x9, x9, #1
WORD $0x4e61d400 // fadd v0.2d, v0.2d, v1.2d
WORD $0x4e60d440 // fadd v0.2d, v2.2d, v0.2d
WORD $0x4e60d460 // fadd v0.2d, v3.2d, v0.2d
WORD $0x4e60d480 // fadd v0.2d, v4.2d, v0.2d
BNE LBB0_2
WORD $0x8b0a0c00 // add x0, x0, x10, lsl #3
WORD $0x7e70d800 // faddp d0, v0.2d
CBZ R8, LBB0_5
LBB0_4:
WORD $0xfc408401 // ldr d1, [x0], #8
WORD $0xf1000508 // subs x8, x8, #1
WORD $0x1e612800 // fadd d0, d0, d1
BNE LBB0_4
LBB0_5:
WORD $0xfd000040 // str d0, [x2]
RET
LBB0_6:
WORD $0x6f00e400 // movi v0.2d, #0000000000000000
WORD $0x7e70d800 // faddp d0, v0.2d
CBNZ R8, LBB0_4
JMP LBB0_5
|