File: crc32_loong64.s

package info (click to toggle)
golang-github-klauspost-crc32 1.3.0-1
links: PTS, VCS
area: main
in suites: sid
size: 316 kB
sloc: asm: 2,849; makefile: 2
file content (160 lines) | stat: -rw-r--r-- 3,229 bytes
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "textflag.h"

// castagnoliUpdate updates the non-inverted crc with the given data.

// func castagnoliUpdate(crc uint32, p []byte) uint32
TEXT ·castagnoliUpdate(SB), NOSPLIT, $0-36
	MOVWU crc+0(FP), R4    // a0 = CRC value
	MOVV  p+8(FP), R5      // a1 = data pointer
	MOVV  p_len+16(FP), R6 // a2 = len(p)

	SGT $8, R6, R12
	BNE R12, less_than_8
	AND $7, R5, R12
	BEQ R12, aligned

	// Process the first few bytes to 8-byte align the input.
	// t0 = 8 - t0. We need to process this many bytes to align.
	SUB $1, R12
	XOR $7, R12

	AND     $1, R12, R13
	BEQ     R13, align_2
	MOVB    (R5), R13
	CRCCWBW R4, R13, R4
	ADDV    $1, R5
	ADDV    $-1, R6

align_2:
	AND     $2, R12, R13
	BEQ     R13, align_4
	MOVH    (R5), R13
	CRCCWHW R4, R13, R4
	ADDV    $2, R5
	ADDV    $-2, R6

align_4:
	AND     $4, R12, R13
	BEQ     R13, aligned
	MOVW    (R5), R13
	CRCCWWW R4, R13, R4
	ADDV    $4, R5
	ADDV    $-4, R6

aligned:
	// The input is now 8-byte aligned and we can process 8-byte chunks.
	SGT     $8, R6, R12
	BNE     R12, less_than_8
	MOVV    (R5), R13
	CRCCWVW R4, R13, R4
	ADDV    $8, R5
	ADDV    $-8, R6
	JMP     aligned

less_than_8:
	// We may have some bytes left over; process 4 bytes, then 2, then 1.
	AND     $4, R6, R12
	BEQ     R12, less_than_4
	MOVW    (R5), R13
	CRCCWWW R4, R13, R4
	ADDV    $4, R5
	ADDV    $-4, R6

less_than_4:
	AND     $2, R6, R12
	BEQ     R12, less_than_2
	MOVH    (R5), R13
	CRCCWHW R4, R13, R4
	ADDV    $2, R5
	ADDV    $-2, R6

less_than_2:
	BEQ     R6, done
	MOVB    (R5), R13
	CRCCWBW R4, R13, R4

done:
	MOVW R4, ret+32(FP)
	RET

// ieeeUpdate updates the non-inverted crc with the given data.

// func ieeeUpdate(crc uint32, p []byte) uint32
TEXT ·ieeeUpdate(SB), NOSPLIT, $0-36
	MOVWU crc+0(FP), R4    // a0 = CRC value
	MOVV  p+8(FP), R5      // a1 = data pointer
	MOVV  p_len+16(FP), R6 // a2 = len(p)

	SGT $8, R6, R12
	BNE R12, less_than_8
	AND $7, R5, R12
	BEQ R12, aligned

	// Process the first few bytes to 8-byte align the input.
	// t0 = 8 - t0. We need to process this many bytes to align.
	SUB $1, R12
	XOR $7, R12

	AND    $1, R12, R13
	BEQ    R13, align_2
	MOVB   (R5), R13
	CRCWBW R4, R13, R4
	ADDV   $1, R5
	ADDV   $-1, R6

align_2:
	AND    $2, R12, R13
	BEQ    R13, align_4
	MOVH   (R5), R13
	CRCWHW R4, R13, R4
	ADDV   $2, R5
	ADDV   $-2, R6

align_4:
	AND    $4, R12, R13
	BEQ    R13, aligned
	MOVW   (R5), R13
	CRCWWW R4, R13, R4
	ADDV   $4, R5
	ADDV   $-4, R6

aligned:
	// The input is now 8-byte aligned and we can process 8-byte chunks.
	SGT    $8, R6, R12
	BNE    R12, less_than_8
	MOVV   (R5), R13
	CRCWVW R4, R13, R4
	ADDV   $8, R5
	ADDV   $-8, R6
	JMP    aligned

less_than_8:
	// We may have some bytes left over; process 4 bytes, then 2, then 1.
	AND    $4, R6, R12
	BEQ    R12, less_than_4
	MOVW   (R5), R13
	CRCWWW R4, R13, R4
	ADDV   $4, R5
	ADDV   $-4, R6

less_than_4:
	AND    $2, R6, R12
	BEQ    R12, less_than_2
	MOVH   (R5), R13
	CRCWHW R4, R13, R4
	ADDV   $2, R5
	ADDV   $-2, R6

less_than_2:
	BEQ    R6, done
	MOVB   (R5), R13
	CRCWBW R4, R13, R4

done:
	MOVW R4, ret+32(FP)
	RET