File: valid_print_amd64.s

package info (click to toggle)
golang-github-segmentio-asm 1.2.0%2Bgit20231107.1cfacc8-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 932 kB
  • sloc: asm: 6,093; makefile: 32
file content (184 lines) | stat: -rw-r--r-- 3,327 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
// Code generated by command: go run valid_print_asm.go -pkg ascii -out ../ascii/valid_print_amd64.s -stubs ../ascii/valid_print_amd64.go. DO NOT EDIT.

//go:build !purego

#include "textflag.h"

// func ValidPrintString(s string) bool
// Requires: AVX, AVX2, SSE4.1
TEXT ·ValidPrintString(SB), NOSPLIT, $0-17
	MOVQ s_base+0(FP), AX
	MOVQ s_len+8(FP), CX
	CMPQ CX, $0x10
	JB   init_x86
	BTL  $0x08, github·com∕segmentio∕asm∕cpu·X86+0(SB)
	JCS  init_avx

init_x86:
	CMPQ CX, $0x08
	JB   cmp4
	MOVQ $0xdfdfdfdfdfdfdfe0, DX
	MOVQ $0x0101010101010101, BX
	MOVQ $0x8080808080808080, SI

cmp8:
	MOVQ  (AX), DI
	MOVQ  DI, R8
	LEAQ  (DI)(DX*1), R9
	NOTQ  R8
	ANDQ  R8, R9
	LEAQ  (DI)(BX*1), R8
	ORQ   R8, DI
	ORQ   R9, DI
	ADDQ  $0x08, AX
	SUBQ  $0x08, CX
	TESTQ SI, DI
	JNE   done
	CMPQ  CX, $0x08
	JB    cmp4
	JMP   cmp8

cmp4:
	CMPQ  CX, $0x04
	JB    cmp3
	MOVL  (AX), DX
	MOVL  DX, BX
	LEAL  3755991008(DX), SI
	NOTL  BX
	ANDL  BX, SI
	LEAL  16843009(DX), BX
	ORL   BX, DX
	ORL   SI, DX
	ADDQ  $0x04, AX
	SUBQ  $0x04, CX
	TESTL $0x80808080, DX
	JNE   done

cmp3:
	CMPQ    CX, $0x03
	JB      cmp2
	MOVWLZX (AX), DX
	MOVBLZX 2(AX), AX
	SHLL    $0x10, AX
	ORL     DX, AX
	ORL     $0x20000000, AX
	JMP     final

cmp2:
	CMPQ    CX, $0x02
	JB      cmp1
	MOVWLZX (AX), AX
	ORL     $0x20200000, AX
	JMP     final

cmp1:
	CMPQ    CX, $0x00
	JE      done
	MOVBLZX (AX), AX
	ORL     $0x20202000, AX

final:
	MOVL  AX, CX
	LEAL  3755991008(AX), DX
	NOTL  CX
	ANDL  CX, DX
	LEAL  16843009(AX), CX
	ORL   CX, AX
	ORL   DX, AX
	TESTL $0x80808080, AX

done:
	SETEQ ret+16(FP)
	RET

init_avx:
	MOVB         $0x1f, DL
	PINSRB       $0x00, DX, X8
	VPBROADCASTB X8, Y8
	MOVB         $0x7e, DL
	PINSRB       $0x00, DX, X9
	VPBROADCASTB X9, Y9

cmp128:
	CMPQ      CX, $0x80
	JB        cmp64
	VMOVDQU   (AX), Y0
	VMOVDQU   32(AX), Y1
	VMOVDQU   64(AX), Y2
	VMOVDQU   96(AX), Y3
	VPCMPGTB  Y8, Y0, Y4
	VPCMPGTB  Y9, Y0, Y0
	VPANDN    Y4, Y0, Y0
	VPCMPGTB  Y8, Y1, Y5
	VPCMPGTB  Y9, Y1, Y1
	VPANDN    Y5, Y1, Y1
	VPCMPGTB  Y8, Y2, Y6
	VPCMPGTB  Y9, Y2, Y2
	VPANDN    Y6, Y2, Y2
	VPCMPGTB  Y8, Y3, Y7
	VPCMPGTB  Y9, Y3, Y3
	VPANDN    Y7, Y3, Y3
	VPAND     Y1, Y0, Y0
	VPAND     Y3, Y2, Y2
	VPAND     Y2, Y0, Y0
	ADDQ      $0x80, AX
	SUBQ      $0x80, CX
	VPMOVMSKB Y0, DX
	XORL      $0xffffffff, DX
	JNE       done
	JMP       cmp128

cmp64:
	CMPQ      CX, $0x40
	JB        cmp32
	VMOVDQU   (AX), Y0
	VMOVDQU   32(AX), Y1
	VPCMPGTB  Y8, Y0, Y2
	VPCMPGTB  Y9, Y0, Y0
	VPANDN    Y2, Y0, Y0
	VPCMPGTB  Y8, Y1, Y3
	VPCMPGTB  Y9, Y1, Y1
	VPANDN    Y3, Y1, Y1
	VPAND     Y1, Y0, Y0
	ADDQ      $0x40, AX
	SUBQ      $0x40, CX
	VPMOVMSKB Y0, DX
	XORL      $0xffffffff, DX
	JNE       done

cmp32:
	CMPQ      CX, $0x20
	JB        cmp16
	VMOVDQU   (AX), Y0
	VPCMPGTB  Y8, Y0, Y1
	VPCMPGTB  Y9, Y0, Y0
	VPANDN    Y1, Y0, Y0
	ADDQ      $0x20, AX
	SUBQ      $0x20, CX
	VPMOVMSKB Y0, DX
	XORL      $0xffffffff, DX
	JNE       done

cmp16:
	CMPQ      CX, $0x10
	JLE       cmp_tail
	VMOVDQU   (AX), X0
	VPCMPGTB  X8, X0, X1
	VPCMPGTB  X9, X0, X0
	VPANDN    X1, X0, X0
	ADDQ      $0x10, AX
	SUBQ      $0x10, CX
	VPMOVMSKB X0, DX
	XORL      $0x0000ffff, DX
	JNE       done

cmp_tail:
	SUBQ      $0x10, CX
	ADDQ      CX, AX
	VMOVDQU   (AX), X0
	VPCMPGTB  X8, X0, X1
	VPCMPGTB  X9, X0, X0
	VPANDN    X1, X0, X0
	VPMOVMSKB X0, DX
	XORL      $0x0000ffff, DX
	JMP       done