1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
|
/*
* lz77.c: common LZ77 compression code between Deflate and LZX.
*/
#include "halibut.h" /* only for snew, sfree etc */
#include "lz77.h"
/*
* Modifiable parameters.
*/
#define HASHMAX 2039 /* one more than max hash value */
#define MAXMATCH 32 /* how many matches we track */
#define HASHCHARS 3 /* how many chars make a hash */
/*
* This compressor takes a less slapdash approach than the
* gzip/zlib one. Rather than allowing our hash chains to fall into
* disuse near the far end, we keep them doubly linked so we can
* _find_ the far end, and then every time we add a new byte to the
* window (thus rolling round by one and removing the previous
* byte), we can carefully remove the hash chain entry.
*/
#define INVALID -1 /* invalid hash _and_ invalid offset */
struct WindowEntry {
short next, prev; /* array indices within the window */
short hashval;
};
struct HashEntry {
short first; /* window index of first in chain */
};
struct Match {
int distance, len;
};
struct LZ77InternalContext {
int winsize;
struct WindowEntry *win; /* [winsize] */
unsigned char *data; /* [winsize] */
int winpos;
struct HashEntry hashtab[HASHMAX];
unsigned char pending[HASHCHARS];
int npending;
};
static int lz77_hash(const unsigned char *data)
{
return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX;
}
void lz77_init(struct LZ77Context *ctx, int winsize)
{
struct LZ77InternalContext *st;
int i;
st = snew(struct LZ77InternalContext);
ctx->ictx = st;
st->winsize = winsize;
st->win = snewn(st->winsize, struct WindowEntry);
st->data = snewn(st->winsize, unsigned char);
for (i = 0; i < st->winsize; i++)
st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
for (i = 0; i < HASHMAX; i++)
st->hashtab[i].first = INVALID;
st->winpos = 0;
st->npending = 0;
}
void lz77_cleanup(struct LZ77Context *ctx)
{
struct LZ77InternalContext *st = ctx->ictx;
sfree(st->win);
sfree(st->data);
sfree(st);
}
static void lz77_advance(struct LZ77InternalContext *st,
unsigned char c, int hash)
{
int off;
/*
* Remove the hash entry at winpos from the tail of its chain,
* or empty the chain if it's the only thing on the chain.
*/
if (st->win[st->winpos].prev != INVALID) {
st->win[st->win[st->winpos].prev].next = INVALID;
} else if (st->win[st->winpos].hashval != INVALID) {
st->hashtab[st->win[st->winpos].hashval].first = INVALID;
}
/*
* Create a new entry at winpos and add it to the head of its
* hash chain.
*/
st->win[st->winpos].hashval = hash;
st->win[st->winpos].prev = INVALID;
off = st->win[st->winpos].next = st->hashtab[hash].first;
st->hashtab[hash].first = st->winpos;
if (off != INVALID)
st->win[off].prev = st->winpos;
st->data[st->winpos] = c;
/*
* Advance the window pointer.
*/
st->winpos = (st->winpos + 1) % st->winsize;
}
#define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)%st->winsize] : data[k] )
void lz77_compress(struct LZ77Context *ctx,
const unsigned char *data, int len, bool compress)
{
struct LZ77InternalContext *st = ctx->ictx;
int i, hash, distance, off, nmatch, matchlen, advance;
struct Match defermatch, matches[MAXMATCH];
int deferchr;
/*
* Add any pending characters from last time to the window. (We
* might not be able to.)
*/
for (i = 0; i < st->npending; i++) {
unsigned char foo[HASHCHARS];
int j;
if (len + st->npending - i < HASHCHARS) {
/* Update the pending array. */
for (j = i; j < st->npending; j++)
st->pending[j - i] = st->pending[j];
break;
}
for (j = 0; j < HASHCHARS; j++)
foo[j] = (i + j < st->npending ? st->pending[i + j] :
data[i + j - st->npending]);
lz77_advance(st, foo[0], lz77_hash(foo));
}
st->npending -= i;
defermatch.distance = defermatch.len = 0;
deferchr = '\0';
while (len > 0) {
/* Don't even look for a match, if we're not compressing. */
if (compress && len >= HASHCHARS) {
/*
* Hash the next few characters.
*/
hash = lz77_hash(data);
/*
* Look the hash up in the corresponding hash chain and see
* what we can find.
*/
nmatch = 0;
for (off = st->hashtab[hash].first;
off != INVALID; off = st->win[off].next) {
/* distance = 1 if off == st->winpos-1 */
/* distance = winsize if off == st->winpos */
distance = st->winsize -
(off + st->winsize - st->winpos) % st->winsize;
for (i = 0; i < HASHCHARS; i++)
if (CHARAT(i) != CHARAT(i - distance))
break;
if (i == HASHCHARS) {
matches[nmatch].distance = distance;
matches[nmatch].len = 3;
if (++nmatch >= MAXMATCH)
break;
}
}
} else {
nmatch = 0;
hash = INVALID;
}
if (nmatch > 0) {
/*
* We've now filled up matches[] with nmatch potential
* matches. Follow them down to find the longest. (We
* assume here that it's always worth favouring a
* longer match over a shorter one.)
*/
matchlen = HASHCHARS;
while (matchlen < len) {
int j;
for (i = j = 0; i < nmatch; i++) {
if (CHARAT(matchlen) ==
CHARAT(matchlen - matches[i].distance)) {
matches[j++] = matches[i];
}
}
if (j == 0)
break;
matchlen++;
nmatch = j;
}
/*
* We've now got all the longest matches. We favour the
* shorter distances, which means we go with matches[0].
* So see if we want to defer it or throw it away.
*/
matches[0].len = matchlen;
if (defermatch.len > 0) {
if (matches[0].len > defermatch.len + 1) {
/* We have a better match. Emit the deferred char,
* and defer this match. */
ctx->literal(ctx, (unsigned char) deferchr);
defermatch = matches[0];
deferchr = data[0];
advance = 1;
} else {
/* We don't have a better match. Do the deferred one. */
ctx->match(ctx, defermatch.distance, defermatch.len);
advance = defermatch.len - 1;
defermatch.len = 0;
}
} else {
/* There was no deferred match. Defer this one. */
defermatch = matches[0];
deferchr = data[0];
advance = 1;
}
} else {
/*
* We found no matches. Emit the deferred match, if
* any; otherwise emit a literal.
*/
if (defermatch.len > 0) {
ctx->match(ctx, defermatch.distance, defermatch.len);
advance = defermatch.len - 1;
defermatch.len = 0;
} else {
ctx->literal(ctx, data[0]);
advance = 1;
}
}
/*
* Now advance the position by `advance' characters,
* keeping the window and hash chains consistent.
*/
while (advance > 0) {
if (len >= HASHCHARS) {
lz77_advance(st, *data, lz77_hash(data));
} else {
st->pending[st->npending++] = *data;
}
data++;
len--;
advance--;
}
}
}
|