File: strchr-mte.S

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (131 lines) | stat: -rw-r--r-- 3,905 bytes parent folder | download | duplicates (23)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
 * strchr - find a character in a string
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 */

/* Assumptions:
 *
 * ARMv8-a, AArch64
 * Neon Available.
 */

#include "../asmdefs.h"

/* Arguments and results.  */
#define srcin		x0
#define chrin		w1

#define result		x0

#define src		x2
#define	tmp1		x3
#define wtmp2		w4
#define tmp3		x5

#define vrepchr		v0
#define qdata		q1
#define vdata		v1
#define vhas_nul	v2
#define vhas_chr	v3
#define vrepmask_0	v4
#define vrepmask_c	v5
#define vend		v6

#define L(l) .L ## l

/* Core algorithm.

   For each 16-byte chunk we calculate a 64-bit syndrome value, with
   four bits per byte (LSB is always in bits 0 and 1, for both big
   and little-endian systems).  For each tuple, bit 0 is set if
   the relevant byte matched the requested character; bit 1 is set
   if the relevant byte matched the NUL end of string (we trigger
   off bit0 for the special case of looking for NUL) and bits 2 and 3
   are not used.
   Since the bits in the syndrome reflect exactly the order in which
   things occur in the original string a count_trailing_zeros()
   operation will identify exactly which byte is causing the termination,
   and why. */

/* Locals and temporaries. */

ENTRY(__strchr_aarch64_mte)
	/* Magic constant 0x10011001 to allow us to identify which lane
	   matches the requested byte.  Magic constant 0x20022002 used
	   similarly for NUL termination. */
	mov	wtmp2, #0x1001
	movk	wtmp2, #0x1001, lsl #16
	dup	vrepchr.16b, chrin
	bic	src, srcin, #15		/* Work with aligned 16-byte chunks. */
	dup	vrepmask_c.4s, wtmp2
	ands	tmp1, srcin, #15
	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
	b.eq	L(loop)

	/* Input string is not 16-byte aligned.  Rather than forcing
	   the padding bytes to a safe value, we calculate the syndrome
	   for all the bytes, but then mask off those bits of the
	   syndrome that are related to the padding.  */
	ldr	qdata, [src], #16
	cmeq	vhas_nul.16b, vdata.16b, #0
	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
	and	vhas_nul.16b, vhas_nul.16b, vrepmask_0.16b
	and	vhas_chr.16b, vhas_chr.16b, vrepmask_c.16b
	lsl	tmp1, tmp1, #2
	orr	vend.16b, vhas_nul.16b, vhas_chr.16b
	mov	tmp3, #~0
	addp	vend.16b, vend.16b, vend.16b		/* 128->64 */
	lsl	tmp1, tmp3, tmp1

	mov	tmp3, vend.d[0]
	ands	tmp1, tmp3, tmp1	/* Mask padding bits. */
	b.ne	L(tail)

L(loop):
	ldr	qdata, [src], #32
	cmeq	vhas_nul.16b, vdata.16b, #0
	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
	/* Use a fast check for the termination condition.  */
	orr	vend.16b, vhas_nul.16b, vhas_chr.16b
	addp	vend.16b, vend.16b, vend.16b		/* 128->64 */
	mov	tmp1, vend.d[0]
	cbnz	tmp1, L(end)

	ldr	qdata, [src, #-16]
	cmeq	vhas_nul.16b, vdata.16b, #0
	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
	/* Use a fast check for the termination condition.  */
	orr	vend.16b, vhas_nul.16b, vhas_chr.16b
	addp	vend.16b, vend.16b, vend.16b		/* 128->64 */
	mov	tmp1, vend.d[0]
	cbz	tmp1, L(loop)

	/* Adjust src for next two subtractions. */
	add	src, src, #16
L(end):
	/* Termination condition found.  Now need to establish exactly why
	   we terminated.  */
	and	vhas_nul.16b, vhas_nul.16b, vrepmask_0.16b
	and	vhas_chr.16b, vhas_chr.16b, vrepmask_c.16b
	sub	src, src, #16
	orr	vend.16b, vhas_nul.16b, vhas_chr.16b
	addp	vend.16b, vend.16b, vend.16b		/* 128->64 */

	mov	tmp1, vend.d[0]
L(tail):
	/* Count the trailing zeros, by bit reversing...  */
	rbit	tmp1, tmp1
	/* Re-bias source.  */
	sub	src, src, #16
	clz	tmp1, tmp1	/* And counting the leading zeros.  */
	/* Tmp1 is even if the target character was found first.  Otherwise
	   we've found the end of string and we weren't looking for NUL.  */
	tst	tmp1, #1
	add	result, src, tmp1, lsr #2
	csel	result, result, xzr, eq
	ret

END(__strchr_aarch64_mte)