1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
|
namespace System.IO.Compression {
using System;
using System.Diagnostics;
internal class FastEncoderWindow {
private byte[] window; // complete bytes window
private int bufPos; // the start index of uncompressed bytes
private int bufEnd; // the end index of uncompressed bytes
// Be very careful about increasing the window size; the code tables will have to
// be updated, since they assume that extra_distance_bits is never larger than a
// certain size.
const int FastEncoderHashShift = 4;
const int FastEncoderHashtableSize = 2048;
const int FastEncoderHashMask = FastEncoderHashtableSize-1;
const int FastEncoderWindowSize = 8192;
const int FastEncoderWindowMask = FastEncoderWindowSize - 1;
const int FastEncoderMatch3DistThreshold = 16384;
internal const int MaxMatch = 258;
internal const int MinMatch = 3;
// Following constants affect the search,
// they should be modifiable if we support different compression levels in future.
const int SearchDepth = 32;
const int GoodLength = 4;
const int NiceLength = 32;
const int LazyMatchThreshold = 6;
// Hashtable structure
private ushort[] prev; // next most recent occurance of chars with same hash value
private ushort[] lookup; // hash table to find most recent occurance of chars with same hash value
public FastEncoderWindow() {
ResetWindow();
}
public int BytesAvailable { // uncompressed bytes
get {
Debug.Assert(bufEnd - bufPos >= 0, "Ending pointer can't be in front of starting pointer!");
return bufEnd - bufPos;
}
}
public DeflateInput UnprocessedInput {
get {
DeflateInput input = new DeflateInput();
input.Buffer = window;
input.StartIndex = bufPos;
input.Count = bufEnd - bufPos;
return input;
}
}
public void FlushWindow() {
ResetWindow();
}
private void ResetWindow() {
window = new byte[2 * FastEncoderWindowSize + MaxMatch + 4];
prev = new ushort[FastEncoderWindowSize + MaxMatch];
lookup = new ushort[FastEncoderHashtableSize];
bufPos = FastEncoderWindowSize;
bufEnd = bufPos;
}
public int FreeWindowSpace { // Free space in the window
get {
return 2 * FastEncoderWindowSize - bufEnd;
}
}
// copy bytes from input buffer into window
public void CopyBytes(byte[] inputBuffer, int startIndex, int count) {
Array.Copy(inputBuffer, startIndex, window, bufEnd, count);
bufEnd += count;
}
// slide the history window to the left by FastEncoderWindowSize bytes
public void MoveWindows() {
int i;
Debug.Assert(bufPos == 2*FastEncoderWindowSize, "only call this at the end of the window");
// verify that the hash table is correct
VerifyHashes(); // Debug only code
Array.Copy(window, bufPos - FastEncoderWindowSize, window, 0, FastEncoderWindowSize);
// move all the hash pointers back
for (i = 0; i < FastEncoderHashtableSize; i++) {
int val = ((int) lookup[i]) - FastEncoderWindowSize;
if (val <= 0) { // too far away now? then set to zero
lookup[i] = (ushort) 0;
} else {
lookup[i] = (ushort) val;
}
}
// prev[]'s are absolute pointers, not relative pointers, so we have to move them back too
// making prev[]'s into relative pointers poses problems of its own
for (i = 0; i < FastEncoderWindowSize; i++) {
long val = ((long) prev[i]) - FastEncoderWindowSize;
if (val <= 0) {
prev[i] = (ushort) 0;
} else {
prev[i] = (ushort) val;
}
}
#if DEBUG
// For debugging, wipe the window clean, so that if there is a bug in our hashing,
// the hash pointers will now point to locations which are not valid for the hash value
// (and will be caught by our ASSERTs).
Array.Clear(window, FastEncoderWindowSize, window.Length - FastEncoderWindowSize);
#endif
VerifyHashes(); // debug: verify hash table is correct
bufPos = FastEncoderWindowSize;
bufEnd = bufPos;
}
private uint HashValue(uint hash, byte b) {
return(hash << FastEncoderHashShift) ^ b;
}
// insert string into hash table and return most recent location of same hash value
private uint InsertString(ref uint hash) {
// Note we only use the lowest 11 bits of the hash vallue (hash table size is 11).
// This enables fast calculation of hash value for the input string.
// If we want to get the next hash code starting at next position,
// we can just increment bufPos and call this function.
hash = HashValue( hash, window[bufPos+2] );
// Need to assert the hash value
uint search = lookup[hash & FastEncoderHashMask];
lookup[hash & FastEncoderHashMask] = (ushort) bufPos;
prev[bufPos & FastEncoderWindowMask] = (ushort) search;
return search;
}
//
// insert strings into hashtable
// Arguments:
// hash : intial hash value
// matchLen : 1 + number of strings we need to insert.
//
private void InsertStrings(ref uint hash, int matchLen) {
Debug.Assert(matchLen > 0, "Invalid match Len!");
if (bufEnd - bufPos <= matchLen) {
bufPos += (matchLen-1);
}
else {
while (--matchLen > 0) {
InsertString(ref hash);
bufPos++;
}
}
}
//
// Find out what we should generate next. It can be a symbol, a distance/length pair
// or a symbol followed by distance/length pair
//
internal bool GetNextSymbolOrMatch(Match match) {
Debug.Assert(bufPos >= FastEncoderWindowSize && bufPos < (2*FastEncoderWindowSize), "Invalid Buffer Position!");
// initialise the value of the hash, no problem if locations bufPos, bufPos+1
// are invalid (not enough data), since we will never insert using that hash value
uint hash = HashValue( 0 , window[bufPos]);
hash = HashValue( hash , window[bufPos + 1]);
int matchLen;
int matchPos = 0;
VerifyHashes(); // Debug only code
if (bufEnd - bufPos <= 3) {
// The hash value becomes corrupt when we get within 3 characters of the end of the
// input window, since the hash value is based on 3 characters. We just stop
// inserting into the hash table at this point, and allow no matches.
matchLen = 0;
}
else {
// insert string into hash table and return most recent location of same hash value
int search = (int)InsertString(ref hash);
// did we find a recent location of this hash value?
if (search != 0) {
// yes, now find a match at what we'll call position X
matchLen = FindMatch(search, out matchPos, SearchDepth, NiceLength);
// truncate match if we're too close to the end of the input window
if (bufPos + matchLen > bufEnd)
matchLen = bufEnd - bufPos;
}
else {
// no most recent location found
matchLen = 0;
}
}
if (matchLen < MinMatch) {
// didn't find a match, so output unmatched char
match.State = MatchState.HasSymbol;
match.Symbol = window[bufPos];
bufPos++;
}
else {
// bufPos now points to X+1
bufPos++;
// is this match so good (long) that we should take it automatically without
// checking X+1 ?
if (matchLen <= LazyMatchThreshold) {
int nextMatchLen;
int nextMatchPos = 0;
// search at position X+1
int search = (int)InsertString(ref hash);
// no, so check for a better match at X+1
if (search != 0) {
nextMatchLen = FindMatch(search, out nextMatchPos,
matchLen < GoodLength ? SearchDepth : (SearchDepth >> 2),NiceLength);
// truncate match if we're too close to the end of the window
// note: nextMatchLen could now be < MinMatch
if (bufPos + nextMatchLen > bufEnd) {
nextMatchLen = bufEnd - bufPos;
}
} else {
nextMatchLen = 0;
}
// right now X and X+1 are both inserted into the search tree
if (nextMatchLen > matchLen) {
// since nextMatchLen > matchLen, it can't be < MinMatch here
// match at X+1 is better, so output unmatched char at X
match.State = MatchState.HasSymbolAndMatch;
match.Symbol = window[bufPos-1];
match.Position = nextMatchPos;
match.Length = nextMatchLen;
// insert remainder of second match into search tree
// example: (*=inserted already)
//
// X X+1 X+2 X+3 X+4
// * *
// nextmatchlen=3
// bufPos
//
// If nextMatchLen == 3, we want to perform 2
// insertions (at X+2 and X+3). However, first we must
// inc bufPos.
//
bufPos++; // now points to X+2
matchLen = nextMatchLen;
InsertStrings(ref hash, matchLen);
} else {
// match at X is better, so take it
match.State = MatchState.HasMatch;
match.Position = matchPos;
match.Length = matchLen;
// Insert remainder of first match into search tree, minus the first
// two locations, which were inserted by the FindMatch() calls.
//
// For example, if matchLen == 3, then we've inserted at X and X+1
// already (and bufPos is now pointing at X+1), and now we need to insert
// only at X+2.
//
matchLen--;
bufPos++; // now bufPos points to X+2
InsertStrings(ref hash, matchLen);
}
} else { // match_length >= good_match
// in assertion: bufPos points to X+1, location X inserted already
// first match is so good that we're not even going to check at X+1
match.State = MatchState.HasMatch;
match.Position = matchPos;
match.Length = matchLen;
// insert remainder of match at X into search tree
InsertStrings(ref hash, matchLen);
}
}
if (bufPos == 2*FastEncoderWindowSize) {
MoveWindows();
}
return true;
}
//
// Find a match starting at specified position and return length of match
// Arguments:
// search : where to start searching
// matchPos : return match position here
// searchDepth : # links to traverse
// NiceLength : stop immediately if we find a match >= NiceLength
//
int FindMatch(int search, out int matchPos, int searchDepth, int niceLength ) {
Debug.Assert(bufPos >= 0 && bufPos < 2*FastEncoderWindowSize, "Invalid Buffer position!");
Debug.Assert(search < bufPos, "Invalid starting search point!");
Debug.Assert(RecalculateHash((int)search) == RecalculateHash(bufPos));
int bestMatch = 0; // best match length found so far
int bestMatchPos = 0; // absolute match position of best match found
// the earliest we can look
int earliest = bufPos - FastEncoderWindowSize;
Debug.Assert(earliest >= 0, "bufPos is less than FastEncoderWindowSize!");
byte wantChar = window[bufPos];
while (search > earliest) {
// make sure all our hash links are valid
Debug.Assert(RecalculateHash((int)search) == RecalculateHash(bufPos), "Corrupted hash link!");
// Start by checking the character that would allow us to increase the match
// length by one. This improves performance quite a bit.
if (window[search + bestMatch] == wantChar) {
int j;
// Now make sure that all the other characters are correct
for (j = 0; j < MaxMatch; j++) {
if (window[bufPos+j] != window[search+j])
break;
}
if (j > bestMatch) {
bestMatch = j;
bestMatchPos = search; // absolute position
if (j > NiceLength) break;
wantChar = window[bufPos+j];
}
}
if (--searchDepth == 0) {
break;
}
Debug.Assert(prev[search & FastEncoderWindowMask] < search, "we should always go backwards!");
search = prev[search & FastEncoderWindowMask];
}
// doesn't necessarily mean we found a match; bestMatch could be > 0 and < MinMatch
matchPos = bufPos - bestMatchPos - 1; // convert absolute to relative position
// don't allow match length 3's which are too far away to be worthwhile
if (bestMatch == 3 && matchPos >= FastEncoderMatch3DistThreshold) {
return 0;
}
Debug.Assert(bestMatch < MinMatch || matchPos < FastEncoderWindowSize, "Only find match inside FastEncoderWindowSize");
return bestMatch;
}
[Conditional("DEBUG")]
void VerifyHashes() {
for (int i = 0; i < FastEncoderHashtableSize; i++) {
ushort where = lookup[i];
ushort nextWhere;
while (where != 0 && bufPos - where < FastEncoderWindowSize) {
Debug.Assert(RecalculateHash(where) == i, "Incorrect Hashcode!");
nextWhere = prev[where & FastEncoderWindowMask];
if (bufPos - nextWhere >= FastEncoderWindowSize) {
break;
}
Debug.Assert(nextWhere < where, "pointer is messed up!");
where = nextWhere;
}
}
}
// can't use conditional attribute here.
uint RecalculateHash(int position) {
return (uint)(((window[position] << (2*FastEncoderHashShift)) ^
(window[position+1] << FastEncoderHashShift) ^
(window[position+2])) & FastEncoderHashMask);
}
}
}
|