1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
#![warn(unsafe_op_in_unsafe_fn)]
#[cfg(target_arch = "x86_64")]
mod avx2;
mod generic;
#[cfg(target_arch = "aarch64")]
mod neon;
#[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))]
mod wasm;
pub fn adler32(start_checksum: u32, data: &[u8]) -> u32 {
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx2() {
return avx2::adler32_avx2(start_checksum, data);
}
#[cfg(target_arch = "aarch64")]
if crate::cpu_features::is_enabled_neon() {
return self::neon::adler32_neon(start_checksum, data);
}
#[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))]
if crate::cpu_features::is_enabled_simd128() {
return self::wasm::adler32_wasm(start_checksum, data);
}
generic::adler32_rust(start_checksum, data)
}
pub fn adler32_fold_copy(start_checksum: u32, dst: &mut [u8], src: &[u8]) -> u32 {
debug_assert!(dst.len() >= src.len(), "{} < {}", dst.len(), src.len());
// integrating the memcpy into the adler32 function did not have any benefits, and in fact was
// a bit slower for very small chunk sizes.
dst[..src.len()].copy_from_slice(src);
adler32(start_checksum, src)
}
pub fn adler32_combine(adler1: u32, adler2: u32, len2: u64) -> u32 {
const BASE: u64 = self::BASE as u64;
let rem = len2 % BASE;
let adler1 = adler1 as u64;
let adler2 = adler2 as u64;
/* the derivation of this formula is left as an exercise for the reader */
let mut sum1 = adler1 & 0xffff;
let mut sum2 = rem * sum1;
sum2 %= BASE;
sum1 += (adler2 & 0xffff) + BASE - 1;
sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
if sum1 >= BASE {
sum1 -= BASE;
}
if sum1 >= BASE {
sum1 -= BASE;
}
if sum2 >= (BASE << 1) {
sum2 -= BASE << 1;
}
if sum2 >= BASE {
sum2 -= BASE;
}
(sum1 | (sum2 << 16)) as u32
}
// inefficient but correct, useful for testing
#[cfg(test)]
fn naive_adler32(start_checksum: u32, data: &[u8]) -> u32 {
const MOD_ADLER: u32 = 65521; // Largest prime smaller than 2^16
let mut a = start_checksum & 0xFFFF;
let mut b = (start_checksum >> 16) & 0xFFFF;
for &byte in data {
a = (a + byte as u32) % MOD_ADLER;
b = (b + a) % MOD_ADLER;
}
(b << 16) | a
}
const BASE: u32 = 65521; /* largest prime smaller than 65536 */
const NMAX: u32 = 5552;
#[cfg(test)]
mod test {
use super::*;
#[test]
fn naive_is_fancy_small_inputs() {
for i in 0..128 {
let v = (0u8..i).collect::<Vec<_>>();
assert_eq!(naive_adler32(1, &v), generic::adler32_rust(1, &v));
}
}
#[test]
fn test_adler32_combine() {
::quickcheck::quickcheck(test as fn(_) -> _);
fn test(data: Vec<u8>) -> bool {
let Some(buf_len) = data.first().copied() else {
return true;
};
let buf_size = Ord::max(buf_len, 1) as usize;
let mut adler1 = 1;
let mut adler2 = 1;
for chunk in data.chunks(buf_size) {
adler1 = adler32(adler1, chunk);
}
adler2 = adler32(adler2, &data);
assert_eq!(adler1, adler2);
let combine1 = adler32_combine(adler1, adler2, data.len() as _);
let combine2 = adler32_combine(adler1, adler1, data.len() as _);
assert_eq!(combine1, combine2);
true
}
}
}
|