File: float64_sse4_amd64.s

package info (click to toggle)
golang-github-apache-arrow-go 18.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 32,200 kB
  • sloc: asm: 477,547; ansic: 5,369; cpp: 759; sh: 585; makefile: 319; python: 190; sed: 5
file content (94 lines) | stat: -rw-r--r-- 3,237 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

TEXT ยท_sum_float64_sse4(SB), $0-24

	MOVQ buf+0(FP), DI
	MOVQ len+8(FP), SI
	MOVQ res+16(FP), DX

	LONG $0xc0570f66         // xorpd    xmm0, xmm0
	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
	JE   LBB0_14
	LONG $0x03fe8348         // cmp    rsi, 3
	JBE  LBB0_2
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	LONG $0xfce18349         // and    r9, -4
	JE   LBB0_2
	LONG $0xfc418d4d         // lea    r8, [r9 - 4]
	WORD $0x8944; BYTE $0xc0 // mov    eax, r8d
	WORD $0xe8c1; BYTE $0x02 // shr    eax, 2
	WORD $0xc0ff             // inc    eax
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_7
	WORD $0xf748; BYTE $0xd8 // neg    rax
	LONG $0xc0570f66         // xorpd    xmm0, xmm0
	WORD $0xc931             // xor    ecx, ecx
	LONG $0xc9570f66         // xorpd    xmm1, xmm1

LBB0_9:
	LONG $0x14100f66; BYTE $0xcf   // movupd    xmm2, oword [rdi + 8*rcx]
	LONG $0x5c100f66; WORD $0x10cf // movupd    xmm3, oword [rdi + 8*rcx + 16]
	LONG $0xc2580f66               // addpd    xmm0, xmm2
	LONG $0xcb580f66               // addpd    xmm1, xmm3
	LONG $0x04c18348               // add    rcx, 4
	WORD $0xff48; BYTE $0xc0       // inc    rax
	JNE  LBB0_9
	JMP  LBB0_10

LBB0_2:
	WORD $0x3145; BYTE $0xc9 // xor    r9d, r9d

LBB0_3:
	LONG $0xcf048d4a         // lea    rax, [rdi + 8*r9]
	WORD $0x294c; BYTE $0xce // sub    rsi, r9

LBB0_4:
	LONG $0x00580ff2         // addsd    xmm0, qword [rax]
	LONG $0x08c08348         // add    rax, 8
	WORD $0xff48; BYTE $0xce // dec    rsi
	JNE  LBB0_4

LBB0_14:
	LONG $0x02110ff2 // movsd    qword [rdx], xmm0
	RET

LBB0_7:
	WORD $0xc931     // xor    ecx, ecx
	LONG $0xc0570f66 // xorpd    xmm0, xmm0
	LONG $0xc9570f66 // xorpd    xmm1, xmm1

LBB0_10:
	LONG $0x0cf88349             // cmp    r8, 12
	JB   LBB0_13
	WORD $0x894c; BYTE $0xc8     // mov    rax, r9
	WORD $0x2948; BYTE $0xc8     // sub    rax, rcx
	LONG $0xcf4c8d48; BYTE $0x70 // lea    rcx, [rdi + 8*rcx + 112]

LBB0_12:
	LONG $0x51100f66; BYTE $0x90 // movupd    xmm2, oword [rcx - 112]
	LONG $0x59100f66; BYTE $0xa0 // movupd    xmm3, oword [rcx - 96]
	LONG $0x61100f66; BYTE $0xb0 // movupd    xmm4, oword [rcx - 80]
	LONG $0x69100f66; BYTE $0xc0 // movupd    xmm5, oword [rcx - 64]
	LONG $0xd0580f66             // addpd    xmm2, xmm0
	LONG $0xd9580f66             // addpd    xmm3, xmm1
	LONG $0x71100f66; BYTE $0xd0 // movupd    xmm6, oword [rcx - 48]
	LONG $0x79100f66; BYTE $0xe0 // movupd    xmm7, oword [rcx - 32]
	LONG $0xf4580f66             // addpd    xmm6, xmm4
	LONG $0xf2580f66             // addpd    xmm6, xmm2
	LONG $0xfd580f66             // addpd    xmm7, xmm5
	LONG $0xfb580f66             // addpd    xmm7, xmm3
	LONG $0x41100f66; BYTE $0xf0 // movupd    xmm0, oword [rcx - 16]
	LONG $0x09100f66             // movupd    xmm1, oword [rcx]
	LONG $0xc6580f66             // addpd    xmm0, xmm6
	LONG $0xcf580f66             // addpd    xmm1, xmm7
	LONG $0x80e98348             // sub    rcx, -128
	LONG $0xf0c08348             // add    rax, -16
	JNE  LBB0_12

LBB0_13:
	LONG $0xc1580f66         // addpd    xmm0, xmm1
	LONG $0xc07c0f66         // haddpd    xmm0, xmm0
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	JNE  LBB0_3
	JMP  LBB0_14