File: memory_avx2_amd64.s

package info (click to toggle)
golang-github-apache-arrow-go 18.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 32,200 kB
  • sloc: asm: 477,547; ansic: 5,369; cpp: 759; sh: 585; makefile: 319; python: 190; sed: 5
file content (85 lines) | stat: -rw-r--r-- 3,752 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

TEXT ยท_memset_avx2(SB), $0-24

	MOVQ buf+0(FP), DI
	MOVQ len+8(FP), SI
	MOVQ c+16(FP), DX

	LONG $0x371c8d4c                           // lea    r11, [rdi + rsi]
	WORD $0x3949; BYTE $0xfb                   // cmp    r11, rdi
	JBE  LBB0_13
	LONG $0x80fe8148; WORD $0x0000; BYTE $0x00 // cmp    rsi, 128
	JB   LBB0_12
	WORD $0x8949; BYTE $0xf0                   // mov    r8, rsi
	LONG $0x80e08349                           // and    r8, -128
	WORD $0x8949; BYTE $0xf2                   // mov    r10, rsi
	LONG $0x80e28349                           // and    r10, -128
	JE   LBB0_12
	LONG $0xc26ef9c5                           // vmovd    xmm0, edx
	LONG $0x787de2c4; BYTE $0xc0               // vpbroadcastb    ymm0, xmm0
	LONG $0x804a8d4d                           // lea    r9, [r10 - 128]
	WORD $0x8944; BYTE $0xc8                   // mov    eax, r9d
	WORD $0xe8c1; BYTE $0x07                   // shr    eax, 7
	WORD $0xc0ff                               // inc    eax
	LONG $0x03e08348                           // and    rax, 3
	JE   LBB0_4
	WORD $0xf748; BYTE $0xd8                   // neg    rax
	WORD $0xc931                               // xor    ecx, ecx

LBB0_6:
	LONG $0x047ffec5; BYTE $0x0f   // vmovdqu    yword [rdi + rcx], ymm0
	LONG $0x447ffec5; WORD $0x200f // vmovdqu    yword [rdi + rcx + 32], ymm0
	LONG $0x447ffec5; WORD $0x400f // vmovdqu    yword [rdi + rcx + 64], ymm0
	LONG $0x447ffec5; WORD $0x600f // vmovdqu    yword [rdi + rcx + 96], ymm0
	LONG $0x80e98348               // sub    rcx, -128
	WORD $0xff48; BYTE $0xc0       // inc    rax
	JNE  LBB0_6
	JMP  LBB0_7

LBB0_4:
	WORD $0xc931 // xor    ecx, ecx

LBB0_7:
	LONG $0x80f98149; WORD $0x0001; BYTE $0x00 // cmp    r9, 384
	JB   LBB0_10
	WORD $0x894c; BYTE $0xd0                   // mov    rax, r10
	WORD $0x2948; BYTE $0xc8                   // sub    rax, rcx
	QUAD $0x000001e00f8c8d48                   // lea    rcx, [rdi + rcx + 480]

LBB0_9:
	QUAD $0xfffffe20817ffec5                   // vmovdqu    yword [rcx - 480], ymm0
	QUAD $0xfffffe40817ffec5                   // vmovdqu    yword [rcx - 448], ymm0
	QUAD $0xfffffe60817ffec5                   // vmovdqu    yword [rcx - 416], ymm0
	QUAD $0xfffffe80817ffec5                   // vmovdqu    yword [rcx - 384], ymm0
	QUAD $0xfffffea0817ffec5                   // vmovdqu    yword [rcx - 352], ymm0
	QUAD $0xfffffec0817ffec5                   // vmovdqu    yword [rcx - 320], ymm0
	QUAD $0xfffffee0817ffec5                   // vmovdqu    yword [rcx - 288], ymm0
	QUAD $0xffffff00817ffec5                   // vmovdqu    yword [rcx - 256], ymm0
	QUAD $0xffffff20817ffec5                   // vmovdqu    yword [rcx - 224], ymm0
	QUAD $0xffffff40817ffec5                   // vmovdqu    yword [rcx - 192], ymm0
	QUAD $0xffffff60817ffec5                   // vmovdqu    yword [rcx - 160], ymm0
	LONG $0x417ffec5; BYTE $0x80               // vmovdqu    yword [rcx - 128], ymm0
	LONG $0x417ffec5; BYTE $0xa0               // vmovdqu    yword [rcx - 96], ymm0
	LONG $0x417ffec5; BYTE $0xc0               // vmovdqu    yword [rcx - 64], ymm0
	LONG $0x417ffec5; BYTE $0xe0               // vmovdqu    yword [rcx - 32], ymm0
	LONG $0x017ffec5                           // vmovdqu    yword [rcx], ymm0
	LONG $0x00c18148; WORD $0x0002; BYTE $0x00 // add    rcx, 512
	LONG $0xfe000548; WORD $0xffff             // add    rax, -512
	JNE  LBB0_9

LBB0_10:
	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
	JE   LBB0_13
	WORD $0x014c; BYTE $0xc7 // add    rdi, r8

LBB0_12:
	WORD $0x1788             // mov    byte [rdi], dl
	WORD $0xff48; BYTE $0xc7 // inc    rdi
	WORD $0x3949; BYTE $0xfb // cmp    r11, rdi
	JNE  LBB0_12

LBB0_13:
	VZEROUPPER
	RET