1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
/*
* adler32.c - Adler-32 checksum algorithm
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "lib_common.h"
/* The Adler-32 divisor, or "base", value */
#define DIVISOR 65521
/*
* MAX_CHUNK_LEN is the most bytes that can be processed without the possibility
* of s2 overflowing when it is represented as an unsigned 32-bit integer. This
* value was computed using the following Python script:
*
* divisor = 65521
* count = 0
* s1 = divisor - 1
* s2 = divisor - 1
* while True:
* s1 += 0xFF
* s2 += s1
* if s2 > 0xFFFFFFFF:
* break
* count += 1
* print(count)
*
* Note that to get the correct worst-case value, we must assume that every byte
* has value 0xFF and that s1 and s2 started with the highest possible values
* modulo the divisor.
*/
#define MAX_CHUNK_LEN 5552
/*
* Update the Adler-32 values s1 and s2 using n bytes from p, update p to p + n,
* update n to 0, and reduce s1 and s2 mod DIVISOR. It is assumed that neither
* s1 nor s2 can overflow before the reduction at the end, i.e. n plus any bytes
* already processed after the last reduction must not exceed MAX_CHUNK_LEN.
*
* This uses only portable C code. This is used as a fallback when a vectorized
* implementation of Adler-32 (e.g. AVX2) is unavailable on the platform.
*
* Some of the vectorized implementations also use this to handle the end of the
* data when the data isn't evenly divisible by the length the vectorized code
* works on. To avoid compiler errors about target-specific option mismatches
* when this is used in that way, this is a macro rather than a function.
*
* Although this is unvectorized, this does include an optimization where the
* main loop processes four bytes at a time using a strategy similar to that
* used by vectorized implementations. This provides increased instruction-
* level parallelism compared to the traditional 's1 += *p++; s2 += s1;'.
*/
#define ADLER32_CHUNK(s1, s2, p, n) \
do { \
if (n >= 4) { \
u32 s1_sum = 0; \
u32 byte_0_sum = 0; \
u32 byte_1_sum = 0; \
u32 byte_2_sum = 0; \
u32 byte_3_sum = 0; \
\
do { \
s1_sum += s1; \
s1 += p[0] + p[1] + p[2] + p[3]; \
byte_0_sum += p[0]; \
byte_1_sum += p[1]; \
byte_2_sum += p[2]; \
byte_3_sum += p[3]; \
p += 4; \
n -= 4; \
} while (n >= 4); \
s2 += (4 * (s1_sum + byte_0_sum)) + (3 * byte_1_sum) + \
(2 * byte_2_sum) + byte_3_sum; \
} \
for (; n; n--, p++) { \
s1 += *p; \
s2 += s1; \
} \
s1 %= DIVISOR; \
s2 %= DIVISOR; \
} while (0)
static u32 MAYBE_UNUSED
adler32_generic(u32 adler, const u8 *p, size_t len)
{
u32 s1 = adler & 0xFFFF;
u32 s2 = adler >> 16;
while (len) {
size_t n = MIN(len, MAX_CHUNK_LEN & ~3);
len -= n;
ADLER32_CHUNK(s1, s2, p, n);
}
return (s2 << 16) | s1;
}
/* Include architecture-specific implementation(s) if available. */
#undef DEFAULT_IMPL
#undef arch_select_adler32_func
typedef u32 (*adler32_func_t)(u32 adler, const u8 *p, size_t len);
#if defined(ARCH_ARM32) || defined(ARCH_ARM64)
# include "arm/adler32_impl.h"
#elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
# include "x86/adler32_impl.h"
#endif
#ifndef DEFAULT_IMPL
# define DEFAULT_IMPL adler32_generic
#endif
#ifdef arch_select_adler32_func
static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len);
static volatile adler32_func_t adler32_impl = dispatch_adler32;
/* Choose the best implementation at runtime. */
static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len)
{
adler32_func_t f = arch_select_adler32_func();
if (f == NULL)
f = DEFAULT_IMPL;
adler32_impl = f;
return f(adler, p, len);
}
#else
/* The best implementation is statically known, so call it directly. */
#define adler32_impl DEFAULT_IMPL
#endif
LIBDEFLATEAPI u32
libdeflate_adler32(u32 adler, const void *buffer, size_t len)
{
if (buffer == NULL) /* Return initial value. */
return 1;
return adler32_impl(adler, buffer, len);
}
|