File: bitmap_bmi2_amd64.s

package info (click to toggle)
golang-github-apache-arrow-go 18.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 32,200 kB
  • sloc: asm: 477,547; ansic: 5,369; cpp: 759; sh: 585; makefile: 319; python: 190; sed: 5
file content (117 lines) | stat: -rw-r--r-- 5,115 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

TEXT ·_extract_bits_bmi2(SB), $0-24

	MOVQ bitmap+0(FP), DI
	MOVQ selectBitmap+8(FP), SI

	LONG $0xf5c2e2c4; BYTE $0xc6 // pext    rax, rdi, rsi
	MOVQ AX, res+16(FP)
	RET

DATA LCDATA1<>+0x000(SB)/8, $0x0000000000000000
DATA LCDATA1<>+0x008(SB)/8, $0x0000000000000001
DATA LCDATA1<>+0x010(SB)/8, $0x0000000000000002
DATA LCDATA1<>+0x018(SB)/8, $0x0000000000000003
DATA LCDATA1<>+0x020(SB)/8, $0x0000000000000004
DATA LCDATA1<>+0x028(SB)/8, $0x0000000000000008
DATA LCDATA1<>+0x030(SB)/8, $0x000000000000000c
DATA LCDATA1<>+0x038(SB)/8, $0x0000000000000001
DATA LCDATA1<>+0x040(SB)/8, $0x0000000000000010
GLOBL LCDATA1<>(SB), 8, $72

TEXT ·_levels_to_bitmap_bmi2(SB), $0-32

	MOVQ levels+0(FP), DI
	MOVQ numLevels+8(FP), SI
	MOVW rhs+16(FP), DX
	LEAQ LCDATA1<>(SB), BP

	WORD $0xf685             // test    esi, esi
	JLE  LBB1_1
	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
	WORD $0xfe83; BYTE $0x0f // cmp    esi, 15
	JA   LBB1_4
	WORD $0xf631             // xor    esi, esi
	WORD $0xc031             // xor    eax, eax
	JMP  LBB1_7

LBB1_1:
	WORD $0xc031 // xor    eax, eax
	JMP  LBB1_8

LBB1_4:
	WORD $0x8944; BYTE $0xc6       // mov    esi, r8d
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	LONG $0xc26ef9c5               // vmovd    xmm0, edx
	LONG $0x7979e2c4; BYTE $0xc8   // vpbroadcastw    xmm1, xmm0
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x556ffdc5; BYTE $0x00   // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
	LONG $0x597d62c4; WORD $0x2065 // vpbroadcastq    ymm12, qword 32[rbp] /* [rip + .LCPI1_1] */
	LONG $0x597de2c4; WORD $0x2865 // vpbroadcastq    ymm4, qword 40[rbp] /* [rip + .LCPI1_2] */
	LONG $0x597de2c4; WORD $0x306d // vpbroadcastq    ymm5, qword 48[rbp] /* [rip + .LCPI1_3] */
	LONG $0x597de2c4; WORD $0x3875 // vpbroadcastq    ymm6, qword 56[rbp] /* [rip + .LCPI1_4] */
	LONG $0x597de2c4; WORD $0x407d // vpbroadcastq    ymm7, qword 64[rbp] /* [rip + .LCPI1_5] */
	WORD $0xc031                   // xor    eax, eax
	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
	LONG $0xef3141c4; BYTE $0xc9   // vpxor    xmm9, xmm9, xmm9
	LONG $0xef2941c4; BYTE $0xd2   // vpxor    xmm10, xmm10, xmm10

LBB1_5:
	LONG $0xdad41dc5               // vpaddq    ymm11, ymm12, ymm2
	LONG $0x5c7efac5; WORD $0x0847 // vmovq    xmm3, qword [rdi + 2*rax + 8]
	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
	LONG $0xdcd46dc5               // vpaddq    ymm11, ymm2, ymm4
	LONG $0xc3eb3dc5               // vpor    ymm8, ymm8, ymm3
	LONG $0x5c7efac5; WORD $0x1047 // vmovq    xmm3, qword [rdi + 2*rax + 16]
	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
	LONG $0xddd46dc5               // vpaddq    ymm11, ymm2, ymm5
	LONG $0xcbeb35c5               // vpor    ymm9, ymm9, ymm3
	LONG $0x5c7efac5; WORD $0x1847 // vmovq    xmm3, qword [rdi + 2*rax + 24]
	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
	LONG $0xd3eb2dc5               // vpor    ymm10, ymm10, ymm3
	LONG $0x1c7efac5; BYTE $0x47   // vmovq    xmm3, qword [rdi + 2*rax]
	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
	LONG $0x47e5e2c4; BYTE $0xda   // vpsllvq    ymm3, ymm3, ymm2
	LONG $0xc0ebe5c5               // vpor    ymm0, ymm3, ymm0
	LONG $0x10c08348               // add    rax, 16
	LONG $0xd7d4edc5               // vpaddq    ymm2, ymm2, ymm7
	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
	JNE  LBB1_5
	LONG $0xc0ebbdc5               // vpor    ymm0, ymm8, ymm0
	LONG $0xc0ebb5c5               // vpor    ymm0, ymm9, ymm0
	LONG $0xc0ebadc5               // vpor    ymm0, ymm10, ymm0
	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
	LONG $0xc1ebf9c5               // vpor    xmm0, xmm0, xmm1
	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
	LONG $0xc1ebf9c5               // vpor    xmm0, xmm0, xmm1
	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
	WORD $0x394c; BYTE $0xc6       // cmp    rsi, r8
	JE   LBB1_8

LBB1_7:
	WORD $0xc931                 // xor    ecx, ecx
	LONG $0x77143966             // cmp    word [rdi + 2*rsi], dx
	WORD $0x9f0f; BYTE $0xd1     // setg    cl
	LONG $0xf7c9e2c4; BYTE $0xc9 // shlx    rcx, rcx, rsi
	WORD $0x0948; BYTE $0xc8     // or    rax, rcx
	LONG $0x01c68348             // add    rsi, 1
	WORD $0x3949; BYTE $0xf0     // cmp    r8, rsi
	JNE  LBB1_7

LBB1_8:
	VZEROUPPER
	MOVQ AX, res+24(FP)
	RET