File: union16_amd64.s

package info (click to toggle)
golang-github-segmentio-asm 1.2.0%2Bgit20231107.1cfacc8-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 932 kB
  • sloc: asm: 6,093; makefile: 32
file content (74 lines) | stat: -rw-r--r-- 1,421 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
// Code generated by command: go run union16_asm.go -pkg sortedset -out ../sortedset/union16_amd64.s -stubs ../sortedset/union16_amd64.go. DO NOT EDIT.

//go:build !purego

#include "textflag.h"

// func union16(dst []byte, a []byte, b []byte) (i int, j int, k int)
// Requires: AVX
TEXT ·union16(SB), NOSPLIT, $0-96
	MOVQ     dst_base+0(FP), AX
	MOVQ     a_base+24(FP), CX
	MOVQ     b_base+48(FP), DX
	MOVQ     a_len+32(FP), BX
	ADDQ     CX, BX
	MOVQ     b_len+56(FP), SI
	ADDQ     DX, SI
	VPCMPEQB X0, X0, X0
	VMOVUPS  (CX), X1
	VMOVUPS  (DX), X2

loop:
	VPCMPEQB  X1, X2, X3
	VPXOR     X3, X0, X3
	VPMINUB   X1, X2, X4
	VPCMPEQB  X1, X4, X4
	VPAND     X4, X3, X4
	VPMOVMSKB X3, DI
	VPMOVMSKB X4, R8
	TESTL     DI, DI
	JZ        equal
	BSFL      DI, R9
	BTSL      R9, R8
	JCS       less
	VMOVUPS   X2, (AX)
	ADDQ      $0x10, AX
	ADDQ      $0x10, DX
	CMPQ      DX, SI
	JE        done
	VMOVUPS   (DX), X2
	JMP       loop

less:
	VMOVUPS X1, (AX)
	ADDQ    $0x10, AX
	ADDQ    $0x10, CX
	CMPQ    CX, BX
	JE      done
	VMOVUPS (CX), X1
	JMP     loop

equal:
	VMOVUPS X1, (AX)
	ADDQ    $0x10, AX
	ADDQ    $0x10, CX
	ADDQ    $0x10, DX
	CMPQ    CX, BX
	JE      done
	CMPQ    DX, SI
	JE      done
	VMOVUPS (CX), X1
	VMOVUPS (DX), X2
	JMP     loop

done:
	MOVQ a_base+24(FP), BX
	SUBQ BX, CX
	MOVQ CX, i+72(FP)
	MOVQ b_base+48(FP), CX
	SUBQ CX, DX
	MOVQ DX, j+80(FP)
	MOVQ dst_base+0(FP), CX
	SUBQ CX, AX
	MOVQ AX, k+88(FP)
	RET