File: memory_sse4_amd64.s

package info (click to toggle)
golang-github-apache-arrow-go 18.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 32,200 kB
  • sloc: asm: 477,547; ansic: 5,369; cpp: 759; sh: 585; makefile: 319; python: 190; sed: 5
file content (84 lines) | stat: -rw-r--r-- 3,434 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

TEXT ยท_memset_sse4(SB), $0-24

	MOVQ buf+0(FP), DI
	MOVQ len+8(FP), SI
	MOVQ c+16(FP), DX

	LONG $0x371c8d4c             // lea    r11, [rdi + rsi]
	WORD $0x3949; BYTE $0xfb     // cmp    r11, rdi
	JBE  LBB0_13
	LONG $0x20fe8348             // cmp    rsi, 32
	JB   LBB0_12
	WORD $0x8949; BYTE $0xf0     // mov    r8, rsi
	LONG $0xe0e08349             // and    r8, -32
	WORD $0x8949; BYTE $0xf2     // mov    r10, rsi
	LONG $0xe0e28349             // and    r10, -32
	JE   LBB0_12
	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
	LONG $0xc06e0f66             // movd    xmm0, eax
	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
	LONG $0xe04a8d4d             // lea    r9, [r10 - 32]
	WORD $0x8944; BYTE $0xc9     // mov    ecx, r9d
	WORD $0xe9c1; BYTE $0x05     // shr    ecx, 5
	WORD $0xc1ff                 // inc    ecx
	LONG $0x07e18348             // and    rcx, 7
	JE   LBB0_4
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xc031                 // xor    eax, eax

LBB0_6:
	LONG $0x047f0ff3; BYTE $0x07   // movdqu    oword [rdi + rax], xmm0
	LONG $0x447f0ff3; WORD $0x1007 // movdqu    oword [rdi + rax + 16], xmm0
	LONG $0x20c08348               // add    rax, 32
	WORD $0xff48; BYTE $0xc1       // inc    rcx
	JNE  LBB0_6
	JMP  LBB0_7

LBB0_4:
	WORD $0xc031 // xor    eax, eax

LBB0_7:
	LONG $0xe0f98149; WORD $0x0000; BYTE $0x00 // cmp    r9, 224
	JB   LBB0_10
	WORD $0x894c; BYTE $0xd1                   // mov    rcx, r10
	WORD $0x2948; BYTE $0xc1                   // sub    rcx, rax
	QUAD $0x000000f007848d48                   // lea    rax, [rdi + rax + 240]

LBB0_9:
	QUAD $0xffffff10807f0ff3                   // movdqu    oword [rax - 240], xmm0
	QUAD $0xffffff20807f0ff3                   // movdqu    oword [rax - 224], xmm0
	QUAD $0xffffff30807f0ff3                   // movdqu    oword [rax - 208], xmm0
	QUAD $0xffffff40807f0ff3                   // movdqu    oword [rax - 192], xmm0
	QUAD $0xffffff50807f0ff3                   // movdqu    oword [rax - 176], xmm0
	QUAD $0xffffff60807f0ff3                   // movdqu    oword [rax - 160], xmm0
	QUAD $0xffffff70807f0ff3                   // movdqu    oword [rax - 144], xmm0
	LONG $0x407f0ff3; BYTE $0x80               // movdqu    oword [rax - 128], xmm0
	LONG $0x407f0ff3; BYTE $0x90               // movdqu    oword [rax - 112], xmm0
	LONG $0x407f0ff3; BYTE $0xa0               // movdqu    oword [rax - 96], xmm0
	LONG $0x407f0ff3; BYTE $0xb0               // movdqu    oword [rax - 80], xmm0
	LONG $0x407f0ff3; BYTE $0xc0               // movdqu    oword [rax - 64], xmm0
	LONG $0x407f0ff3; BYTE $0xd0               // movdqu    oword [rax - 48], xmm0
	LONG $0x407f0ff3; BYTE $0xe0               // movdqu    oword [rax - 32], xmm0
	LONG $0x407f0ff3; BYTE $0xf0               // movdqu    oword [rax - 16], xmm0
	LONG $0x007f0ff3                           // movdqu    oword [rax], xmm0
	LONG $0x01000548; WORD $0x0000             // add    rax, 256
	LONG $0x00c18148; WORD $0xffff; BYTE $0xff // add    rcx, -256
	JNE  LBB0_9

LBB0_10:
	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
	JE   LBB0_13
	WORD $0x014c; BYTE $0xc7 // add    rdi, r8

LBB0_12:
	WORD $0x1788             // mov    byte [rdi], dl
	WORD $0xff48; BYTE $0xc7 // inc    rdi
	WORD $0x3949; BYTE $0xfb // cmp    r11, rdi
	JNE  LBB0_12

LBB0_13:
	RET