File: memory_sse4.s

package info (click to toggle)
golang-github-apache-arrow-go 18.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 32,200 kB
  • sloc: asm: 477,547; ansic: 5,369; cpp: 759; sh: 585; makefile: 319; python: 190; sed: 5
file content (96 lines) | stat: -rw-r--r-- 2,048 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
	.text
	.intel_syntax noprefix
	.file	"_lib/memory.c"
	.globl	memset_sse4
	.p2align	4, 0x90
	.type	memset_sse4,@function
memset_sse4:                            # @memset_sse4
# BB#0:
	push	rbp
	mov	rbp, rsp
	and	rsp, -8
	lea	r11, [rdi + rsi]
	cmp	r11, rdi
	jbe	.LBB0_13
# BB#1:
	cmp	rsi, 32
	jb	.LBB0_12
# BB#2:
	mov	r8, rsi
	and	r8, -32
	mov	r10, rsi
	and	r10, -32
	je	.LBB0_12
# BB#3:
	movzx	eax, dl
	movd	xmm0, eax
	pxor	xmm1, xmm1
	pshufb	xmm0, xmm1
	lea	r9, [r10 - 32]
	mov	ecx, r9d
	shr	ecx, 5
	inc	ecx
	and	rcx, 7
	je	.LBB0_4
# BB#5:
	neg	rcx
	xor	eax, eax
	.p2align	4, 0x90
.LBB0_6:                                # =>This Inner Loop Header: Depth=1
	movdqu	xmmword ptr [rdi + rax], xmm0
	movdqu	xmmword ptr [rdi + rax + 16], xmm0
	add	rax, 32
	inc	rcx
	jne	.LBB0_6
	jmp	.LBB0_7
.LBB0_4:
	xor	eax, eax
.LBB0_7:
	cmp	r9, 224
	jb	.LBB0_10
# BB#8:
	mov	rcx, r10
	sub	rcx, rax
	lea	rax, [rdi + rax + 240]
	.p2align	4, 0x90
.LBB0_9:                                # =>This Inner Loop Header: Depth=1
	movdqu	xmmword ptr [rax - 240], xmm0
	movdqu	xmmword ptr [rax - 224], xmm0
	movdqu	xmmword ptr [rax - 208], xmm0
	movdqu	xmmword ptr [rax - 192], xmm0
	movdqu	xmmword ptr [rax - 176], xmm0
	movdqu	xmmword ptr [rax - 160], xmm0
	movdqu	xmmword ptr [rax - 144], xmm0
	movdqu	xmmword ptr [rax - 128], xmm0
	movdqu	xmmword ptr [rax - 112], xmm0
	movdqu	xmmword ptr [rax - 96], xmm0
	movdqu	xmmword ptr [rax - 80], xmm0
	movdqu	xmmword ptr [rax - 64], xmm0
	movdqu	xmmword ptr [rax - 48], xmm0
	movdqu	xmmword ptr [rax - 32], xmm0
	movdqu	xmmword ptr [rax - 16], xmm0
	movdqu	xmmword ptr [rax], xmm0
	add	rax, 256
	add	rcx, -256
	jne	.LBB0_9
.LBB0_10:
	cmp	r10, rsi
	je	.LBB0_13
# BB#11:
	add	rdi, r8
	.p2align	4, 0x90
.LBB0_12:                               # =>This Inner Loop Header: Depth=1
	mov	byte ptr [rdi], dl
	inc	rdi
	cmp	r11, rdi
	jne	.LBB0_12
.LBB0_13:
	mov	rsp, rbp
	pop	rbp
	ret
.Lfunc_end0:
	.size	memset_sse4, .Lfunc_end0-memset_sse4


	.ident	"Apple LLVM version 9.0.0 (clang-900.0.39.2)"
	.section	".note.GNU-stack","",@progbits