1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
|
/* XzCrc64Opt.c -- CRC64 calculation (optimized functions)
2023-12-08 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1
// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
// #define Z7_CRC64_DEBUG_BE
#ifdef Z7_CRC64_DEBUG_BE
#undef MY_CPU_LE
#define MY_CPU_BE
#endif
#if defined(MY_CPU_64BIT)
#define Z7_CRC64_USE_64BIT
#endif
// the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c
#ifdef Z7_CRC64_NUM_TABLES
#define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES
#else
#define Z7_CRC64_NUM_TABLES_USE 12
#endif
#if Z7_CRC64_NUM_TABLES_USE % 4 || \
Z7_CRC64_NUM_TABLES_USE < 4 || \
Z7_CRC64_NUM_TABLES_USE > 4 * 4
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
#ifndef MY_CPU_BE
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
#define Q64LE(n, d) \
( (table + ((n) * 8 + 7) * 0x100)[((d) ) & 0xFF] \
^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \
^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \
^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \
^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \
^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] )
#define R64(a) *((const UInt64 *)(const void *)p + (a))
#else
#define Q32LE(n, d) \
( (table + ((n) * 4 + 3) * 0x100)[((d) ) & 0xFF] \
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
#define R32(a) *((const UInt32 *)(const void *)p + (a))
#endif
#define CRC64_FUNC_PRE_LE2(step) \
UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
#define CRC64_FUNC_PRE_LE(step) \
CRC64_FUNC_PRE_LE2(step); \
CRC64_FUNC_PRE_LE2(step)
CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE)
{
const Byte *p = (const Byte *)data;
const Byte *lim;
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
v = CRC64_UPDATE_BYTE_2(v, *p);
lim = p + size;
if (size >= Z7_CRC64_NUM_TABLES_USE)
{
lim -= Z7_CRC64_NUM_TABLES_USE;
do
{
#if Z7_CRC64_NUM_TABLES_USE == 4
const UInt32 d = (UInt32)v ^ R32(0);
v = (v >> 32) ^ Q32LE(0, d);
#elif Z7_CRC64_NUM_TABLES_USE == 8
#ifdef Z7_CRC64_USE_64BIT
v ^= R64(0);
v = Q64LE(0, v);
#else
UInt32 v0, v1;
v0 = (UInt32)v ^ R32(0);
v1 = (UInt32)(v >> 32) ^ R32(1);
v = Q32LE(1, v0) ^ Q32LE(0, v1);
#endif
#elif Z7_CRC64_NUM_TABLES_USE == 12
UInt32 w;
UInt32 v0, v1;
v0 = (UInt32)v ^ R32(0);
v1 = (UInt32)(v >> 32) ^ R32(1);
w = R32(2);
v = Q32LE(0, w);
v ^= Q32LE(2, v0) ^ Q32LE(1, v1);
#elif Z7_CRC64_NUM_TABLES_USE == 16
#ifdef Z7_CRC64_USE_64BIT
UInt64 w;
UInt64 x;
w = R64(1); x = Q64LE(0, w);
v ^= R64(0); v = x ^ Q64LE(1, v);
#else
UInt32 v0, v1;
UInt32 r0, r1;
v0 = (UInt32)v ^ R32(0);
v1 = (UInt32)(v >> 32) ^ R32(1);
r0 = R32(2);
r1 = R32(3);
v = Q32LE(1, r0) ^ Q32LE(0, r1);
v ^= Q32LE(3, v0) ^ Q32LE(2, v1);
#endif
#else
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
p += Z7_CRC64_NUM_TABLES_USE;
}
while (p <= lim);
lim += Z7_CRC64_NUM_TABLES_USE;
}
for (; p < lim; p++)
v = CRC64_UPDATE_BYTE_2(v, *p);
return v;
}
#undef CRC64_UPDATE_BYTE_2
#undef R32
#undef R64
#undef Q32LE
#undef Q64LE
#undef CRC64_FUNC_PRE_LE
#undef CRC64_FUNC_PRE_LE2
#endif
#ifndef MY_CPU_LE
#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8))
#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
#define Q64BE(n, d) \
( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \
^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \
^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \
^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \
^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \
^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] )
#ifdef Z7_CRC64_DEBUG_BE
#define R64BE(a) GetBe64a((const UInt64 *)(const void *)p + (a))
#else
#define R64BE(a) *((const UInt64 *)(const void *)p + (a))
#endif
#else
#define Q32BE(n, d) \
( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
#ifdef Z7_CRC64_DEBUG_BE
#define R32BE(a) GetBe32a((const UInt32 *)(const void *)p + (a))
#else
#define R32BE(a) *((const UInt32 *)(const void *)p + (a))
#endif
#endif
#define CRC64_FUNC_PRE_BE2(step) \
UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
#define CRC64_FUNC_PRE_BE(step) \
CRC64_FUNC_PRE_BE2(step); \
CRC64_FUNC_PRE_BE2(step)
CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE)
{
const Byte *p = (const Byte *)data;
const Byte *lim;
v = Z7_BSWAP64(v);
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
v = CRC64_UPDATE_BYTE_2_BE(v, *p);
lim = p + size;
if (size >= Z7_CRC64_NUM_TABLES_USE)
{
lim -= Z7_CRC64_NUM_TABLES_USE;
do
{
#if Z7_CRC64_NUM_TABLES_USE == 4
const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0);
v = (v << 32) ^ Q32BE(0, d);
#elif Z7_CRC64_NUM_TABLES_USE == 12
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
const UInt32 w = R32BE(2);
v = Q32BE(0, w);
v ^= Q32BE(2, d1) ^ Q32BE(1, d0);
#elif Z7_CRC64_NUM_TABLES_USE == 8
#ifdef Z7_CRC64_USE_64BIT
v ^= R64BE(0);
v = Q64BE(0, v);
#else
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
v = Q32BE(1, d1) ^ Q32BE(0, d0);
#endif
#elif Z7_CRC64_NUM_TABLES_USE == 16
#ifdef Z7_CRC64_USE_64BIT
const UInt64 w = R64BE(1);
v ^= R64BE(0);
v = Q64BE(0, w) ^ Q64BE(1, v);
#else
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
const UInt32 w1 = R32BE(2);
const UInt32 w0 = R32BE(3);
v = Q32BE(1, w1) ^ Q32BE(0, w0);
v ^= Q32BE(3, d1) ^ Q32BE(2, d0);
#endif
#elif
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
p += Z7_CRC64_NUM_TABLES_USE;
}
while (p <= lim);
lim += Z7_CRC64_NUM_TABLES_USE;
}
for (; p < lim; p++)
v = CRC64_UPDATE_BYTE_2_BE(v, *p);
return Z7_BSWAP64(v);
}
#undef CRC64_UPDATE_BYTE_2_BE
#undef R32BE
#undef R64BE
#undef Q32BE
#undef Q64BE
#undef CRC64_FUNC_PRE_BE
#undef CRC64_FUNC_PRE_BE2
#endif
#undef Z7_CRC64_NUM_TABLES_USE
#endif
|