File: adler32.c

package info (click to toggle)
libdeflate 1.23-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,072 kB
  • sloc: ansic: 11,716; sh: 1,388; python: 169; makefile: 31
file content (162 lines) | stat: -rw-r--r-- 5,133 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
/*
 * adler32.c - Adler-32 checksum algorithm
 *
 * Copyright 2016 Eric Biggers
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "lib_common.h"

/* The Adler-32 divisor, or "base", value */
#define DIVISOR 65521

/*
 * MAX_CHUNK_LEN is the most bytes that can be processed without the possibility
 * of s2 overflowing when it is represented as an unsigned 32-bit integer.  This
 * value was computed using the following Python script:
 *
 *	divisor = 65521
 *	count = 0
 *	s1 = divisor - 1
 *	s2 = divisor - 1
 *	while True:
 *		s1 += 0xFF
 *		s2 += s1
 *		if s2 > 0xFFFFFFFF:
 *			break
 *		count += 1
 *	print(count)
 *
 * Note that to get the correct worst-case value, we must assume that every byte
 * has value 0xFF and that s1 and s2 started with the highest possible values
 * modulo the divisor.
 */
#define MAX_CHUNK_LEN	5552

/*
 * Update the Adler-32 values s1 and s2 using n bytes from p, update p to p + n,
 * update n to 0, and reduce s1 and s2 mod DIVISOR.  It is assumed that neither
 * s1 nor s2 can overflow before the reduction at the end, i.e. n plus any bytes
 * already processed after the last reduction must not exceed MAX_CHUNK_LEN.
 *
 * This uses only portable C code.  This is used as a fallback when a vectorized
 * implementation of Adler-32 (e.g. AVX2) is unavailable on the platform.
 *
 * Some of the vectorized implementations also use this to handle the end of the
 * data when the data isn't evenly divisible by the length the vectorized code
 * works on.  To avoid compiler errors about target-specific option mismatches
 * when this is used in that way, this is a macro rather than a function.
 *
 * Although this is unvectorized, this does include an optimization where the
 * main loop processes four bytes at a time using a strategy similar to that
 * used by vectorized implementations.  This provides increased instruction-
 * level parallelism compared to the traditional 's1 += *p++; s2 += s1;'.
 */
#define ADLER32_CHUNK(s1, s2, p, n)					\
do {									\
	if (n >= 4) {							\
		u32 s1_sum = 0;						\
		u32 byte_0_sum = 0;					\
		u32 byte_1_sum = 0;					\
		u32 byte_2_sum = 0;					\
		u32 byte_3_sum = 0;					\
									\
		do {							\
			s1_sum += s1;					\
			s1 += p[0] + p[1] + p[2] + p[3];		\
			byte_0_sum += p[0];				\
			byte_1_sum += p[1];				\
			byte_2_sum += p[2];				\
			byte_3_sum += p[3];				\
			p += 4;						\
			n -= 4;						\
		} while (n >= 4);					\
		s2 += (4 * (s1_sum + byte_0_sum)) + (3 * byte_1_sum) +	\
		      (2 * byte_2_sum) + byte_3_sum;			\
	}								\
	for (; n; n--, p++) {						\
		s1 += *p;						\
		s2 += s1;						\
	}								\
	s1 %= DIVISOR;							\
	s2 %= DIVISOR;							\
} while (0)

static u32 MAYBE_UNUSED
adler32_generic(u32 adler, const u8 *p, size_t len)
{
	u32 s1 = adler & 0xFFFF;
	u32 s2 = adler >> 16;

	while (len) {
		size_t n = MIN(len, MAX_CHUNK_LEN & ~3);

		len -= n;
		ADLER32_CHUNK(s1, s2, p, n);
	}

	return (s2 << 16) | s1;
}

/* Include architecture-specific implementation(s) if available. */
#undef DEFAULT_IMPL
#undef arch_select_adler32_func
typedef u32 (*adler32_func_t)(u32 adler, const u8 *p, size_t len);
#if defined(ARCH_ARM32) || defined(ARCH_ARM64)
#  include "arm/adler32_impl.h"
#elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
#  include "x86/adler32_impl.h"
#endif

#ifndef DEFAULT_IMPL
#  define DEFAULT_IMPL adler32_generic
#endif

#ifdef arch_select_adler32_func
static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len);

static volatile adler32_func_t adler32_impl = dispatch_adler32;

/* Choose the best implementation at runtime. */
static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len)
{
	adler32_func_t f = arch_select_adler32_func();

	if (f == NULL)
		f = DEFAULT_IMPL;

	adler32_impl = f;
	return f(adler, p, len);
}
#else
/* The best implementation is statically known, so call it directly. */
#define adler32_impl DEFAULT_IMPL
#endif

LIBDEFLATEAPI u32
libdeflate_adler32(u32 adler, const void *buffer, size_t len)
{
	if (buffer == NULL) /* Return initial value. */
		return 1;
	return adler32_impl(adler, buffer, len);
}