File: xor_arm.s

package info (click to toggle)
golang-github-pion-transport 2.0.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bookworm-backports, forky, sid, trixie
  • size: 652 kB
  • sloc: asm: 259; makefile: 4
file content (114 lines) | stat: -rw-r--r-- 1,957 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// Copyright 2022 The Pion Authors. All rights reserved.
// Use of this source code is governed by an MIT
// license that can be found in the LICENSE file.

#include "textflag.h"

// func xorBytesARM32(dst, a, b *byte, n int)
TEXT ·xorBytesARM32(SB), NOSPLIT|NOFRAME, $0
	MOVW	dst+0(FP), R0
	MOVW	a+4(FP), R1
	MOVW	b+8(FP), R2
	MOVW	n+12(FP), R3
	CMP	$4, R3
	BLT	less_than4

loop_4:
	MOVW.P	4(R1), R4
	MOVW.P	4(R2), R5
	EOR	R4, R5, R5
	MOVW.P	R5, 4(R0)

	SUB	$4, R3
	CMP	$4, R3
	BGE	loop_4

less_than4:
	CMP	$2, R3
	BLT	less_than2
	MOVH.P	2(R1), R4
	MOVH.P	2(R2), R5
	EOR	R4, R5, R5
	MOVH.P	R5, 2(R0)

	SUB	$2, R3

less_than2:
	CMP	$0, R3
	BEQ	end
	MOVB	(R1), R4
	MOVB	(R2), R5
	EOR	R4, R5, R5
	MOVB	R5, (R0)
end:
	RET

// func xorBytesNEON32(dst, a, b *byte, n int)
TEXT ·xorBytesNEON32(SB), NOSPLIT|NOFRAME, $0
	MOVW	dst+0(FP), R0
	MOVW	a+4(FP), R1
	MOVW	b+8(FP), R2
	MOVW	n+12(FP), R3
	CMP	$32, R3
	BLT	less_than32

loop_32:
	WORD	$0xF421020D // vld1.u8 {q0, q1}, [r1]!
	WORD	$0xF422420D // vld1.u8 {q2, q3}, [r2]!
	WORD	$0xF3004154 // veor q2, q0, q2
	WORD	$0xF3026156 // veor q3, q1, q3
	WORD	$0xF400420D // vst1.u8 {q2, q3}, [r0]!

	SUB	$32, R3
	CMP	$32, R3
	BGE	loop_32

less_than32:
	CMP	$16, R3
	BLT	less_than16
	WORD	$0xF4210A0D // vld1.u8 q0, [r1]!
	WORD	$0xF4222A0D // vld1.u8 q1, [r2]!
	WORD	$0xF3002152 // veor q1, q0, q1
	WORD	$0xF4002A0D // vst1.u8 {q1}, [r0]!

	SUB	$16, R3

less_than16:
	CMP	$8, R3
	BLT	less_than8
	WORD	$0xF421070D // vld1.u8 d0, [r1]!
	WORD	$0xF422170D // vld1.u8 d1, [r2]!
	WORD	$0xF3001111 // veor d1, d0, d1
	WORD	$0xF400170D // vst1.u8 {d1}, [r0]!

	SUB	$8, R3

less_than8:
	CMP	$4, R3
	BLT	less_than4
	MOVW.P	4(R1), R4
	MOVW.P	4(R2), R5
	EOR	R4, R5, R5
	MOVW.P	R5, 4(R0)

	SUB	$4, R3

less_than4:
	CMP	$2, R3
	BLT	less_than2
	MOVH.P	2(R1), R4
	MOVH.P	2(R2), R5
	EOR	R4, R5, R5
	MOVH.P	R5, 2(R0)

	SUB	$2, R3

less_than2:
	CMP	$0, R3
	BEQ	end
	MOVB	(R1), R4
	MOVB	(R2), R5
	EOR	R4, R5, R5
	MOVB	R5, (R0)
end:
	RET