File: float64_sse4.s

package info (click to toggle)
golang-github-apache-arrow-go 18.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 32,200 kB
  • sloc: asm: 477,547; ansic: 5,369; cpp: 759; sh: 585; makefile: 319; python: 190; sed: 5
file content (103 lines) | stat: -rw-r--r-- 2,052 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
	.text
	.intel_syntax noprefix
	.file	"_lib/float64.c"
	.globl	sum_float64_sse4
	.p2align	4, 0x90
	.type	sum_float64_sse4,@function
sum_float64_sse4:                       # @sum_float64_sse4
# BB#0:
	push	rbp
	mov	rbp, rsp
	and	rsp, -8
	xorpd	xmm0, xmm0
	test	rsi, rsi
	je	.LBB0_14
# BB#1:
	cmp	rsi, 3
	jbe	.LBB0_2
# BB#5:
	mov	r9, rsi
	and	r9, -4
	je	.LBB0_2
# BB#6:
	lea	r8, [r9 - 4]
	mov	eax, r8d
	shr	eax, 2
	inc	eax
	and	rax, 3
	je	.LBB0_7
# BB#8:
	neg	rax
	xorpd	xmm0, xmm0
	xor	ecx, ecx
	xorpd	xmm1, xmm1
	.p2align	4, 0x90
.LBB0_9:                                # =>This Inner Loop Header: Depth=1
	movupd	xmm2, xmmword ptr [rdi + 8*rcx]
	movupd	xmm3, xmmword ptr [rdi + 8*rcx + 16]
	addpd	xmm0, xmm2
	addpd	xmm1, xmm3
	add	rcx, 4
	inc	rax
	jne	.LBB0_9
	jmp	.LBB0_10
.LBB0_2:
	xor	r9d, r9d
.LBB0_3:
	lea	rax, [rdi + 8*r9]
	sub	rsi, r9
	.p2align	4, 0x90
.LBB0_4:                                # =>This Inner Loop Header: Depth=1
	addsd	xmm0, qword ptr [rax]
	add	rax, 8
	dec	rsi
	jne	.LBB0_4
.LBB0_14:
	movsd	qword ptr [rdx], xmm0
	mov	rsp, rbp
	pop	rbp
	ret
.LBB0_7:
	xor	ecx, ecx
	xorpd	xmm0, xmm0
	xorpd	xmm1, xmm1
.LBB0_10:
	cmp	r8, 12
	jb	.LBB0_13
# BB#11:
	mov	rax, r9
	sub	rax, rcx
	lea	rcx, [rdi + 8*rcx + 112]
	.p2align	4, 0x90
.LBB0_12:                               # =>This Inner Loop Header: Depth=1
	movupd	xmm2, xmmword ptr [rcx - 112]
	movupd	xmm3, xmmword ptr [rcx - 96]
	movupd	xmm4, xmmword ptr [rcx - 80]
	movupd	xmm5, xmmword ptr [rcx - 64]
	addpd	xmm2, xmm0
	addpd	xmm3, xmm1
	movupd	xmm6, xmmword ptr [rcx - 48]
	movupd	xmm7, xmmword ptr [rcx - 32]
	addpd	xmm6, xmm4
	addpd	xmm6, xmm2
	addpd	xmm7, xmm5
	addpd	xmm7, xmm3
	movupd	xmm0, xmmword ptr [rcx - 16]
	movupd	xmm1, xmmword ptr [rcx]
	addpd	xmm0, xmm6
	addpd	xmm1, xmm7
	sub	rcx, -128
	add	rax, -16
	jne	.LBB0_12
.LBB0_13:
	addpd	xmm0, xmm1
	haddpd	xmm0, xmm0
	cmp	r9, rsi
	jne	.LBB0_3
	jmp	.LBB0_14
.Lfunc_end0:
	.size	sum_float64_sse4, .Lfunc_end0-sum_float64_sse4


	.ident	"Apple LLVM version 9.0.0 (clang-900.0.39.2)"
	.section	".note.GNU-stack","",@progbits