File: crc32_loong64.s

package info (click to toggle)
golang-github-klauspost-crc32 1.3.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 316 kB
  • sloc: asm: 2,849; makefile: 2
file content (160 lines) | stat: -rw-r--r-- 3,229 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "textflag.h"

// castagnoliUpdate updates the non-inverted crc with the given data.

// func castagnoliUpdate(crc uint32, p []byte) uint32
TEXT ·castagnoliUpdate(SB), NOSPLIT, $0-36
	MOVWU crc+0(FP), R4    // a0 = CRC value
	MOVV  p+8(FP), R5      // a1 = data pointer
	MOVV  p_len+16(FP), R6 // a2 = len(p)

	SGT $8, R6, R12
	BNE R12, less_than_8
	AND $7, R5, R12
	BEQ R12, aligned

	// Process the first few bytes to 8-byte align the input.
	// t0 = 8 - t0. We need to process this many bytes to align.
	SUB $1, R12
	XOR $7, R12

	AND     $1, R12, R13
	BEQ     R13, align_2
	MOVB    (R5), R13
	CRCCWBW R4, R13, R4
	ADDV    $1, R5
	ADDV    $-1, R6

align_2:
	AND     $2, R12, R13
	BEQ     R13, align_4
	MOVH    (R5), R13
	CRCCWHW R4, R13, R4
	ADDV    $2, R5
	ADDV    $-2, R6

align_4:
	AND     $4, R12, R13
	BEQ     R13, aligned
	MOVW    (R5), R13
	CRCCWWW R4, R13, R4
	ADDV    $4, R5
	ADDV    $-4, R6

aligned:
	// The input is now 8-byte aligned and we can process 8-byte chunks.
	SGT     $8, R6, R12
	BNE     R12, less_than_8
	MOVV    (R5), R13
	CRCCWVW R4, R13, R4
	ADDV    $8, R5
	ADDV    $-8, R6
	JMP     aligned

less_than_8:
	// We may have some bytes left over; process 4 bytes, then 2, then 1.
	AND     $4, R6, R12
	BEQ     R12, less_than_4
	MOVW    (R5), R13
	CRCCWWW R4, R13, R4
	ADDV    $4, R5
	ADDV    $-4, R6

less_than_4:
	AND     $2, R6, R12
	BEQ     R12, less_than_2
	MOVH    (R5), R13
	CRCCWHW R4, R13, R4
	ADDV    $2, R5
	ADDV    $-2, R6

less_than_2:
	BEQ     R6, done
	MOVB    (R5), R13
	CRCCWBW R4, R13, R4

done:
	MOVW R4, ret+32(FP)
	RET

// ieeeUpdate updates the non-inverted crc with the given data.

// func ieeeUpdate(crc uint32, p []byte) uint32
TEXT ·ieeeUpdate(SB), NOSPLIT, $0-36
	MOVWU crc+0(FP), R4    // a0 = CRC value
	MOVV  p+8(FP), R5      // a1 = data pointer
	MOVV  p_len+16(FP), R6 // a2 = len(p)

	SGT $8, R6, R12
	BNE R12, less_than_8
	AND $7, R5, R12
	BEQ R12, aligned

	// Process the first few bytes to 8-byte align the input.
	// t0 = 8 - t0. We need to process this many bytes to align.
	SUB $1, R12
	XOR $7, R12

	AND    $1, R12, R13
	BEQ    R13, align_2
	MOVB   (R5), R13
	CRCWBW R4, R13, R4
	ADDV   $1, R5
	ADDV   $-1, R6

align_2:
	AND    $2, R12, R13
	BEQ    R13, align_4
	MOVH   (R5), R13
	CRCWHW R4, R13, R4
	ADDV   $2, R5
	ADDV   $-2, R6

align_4:
	AND    $4, R12, R13
	BEQ    R13, aligned
	MOVW   (R5), R13
	CRCWWW R4, R13, R4
	ADDV   $4, R5
	ADDV   $-4, R6

aligned:
	// The input is now 8-byte aligned and we can process 8-byte chunks.
	SGT    $8, R6, R12
	BNE    R12, less_than_8
	MOVV   (R5), R13
	CRCWVW R4, R13, R4
	ADDV   $8, R5
	ADDV   $-8, R6
	JMP    aligned

less_than_8:
	// We may have some bytes left over; process 4 bytes, then 2, then 1.
	AND    $4, R6, R12
	BEQ    R12, less_than_4
	MOVW   (R5), R13
	CRCWWW R4, R13, R4
	ADDV   $4, R5
	ADDV   $-4, R6

less_than_4:
	AND    $2, R6, R12
	BEQ    R12, less_than_2
	MOVH   (R5), R13
	CRCWHW R4, R13, R4
	ADDV   $2, R5
	ADDV   $-2, R6

less_than_2:
	BEQ    R6, done
	MOVB   (R5), R13
	CRCWBW R4, R13, R4

done:
	MOVW R4, ret+32(FP)
	RET