1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
/*
* Copyright (c) Yann Collet, Meta Platforms, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "external_matchfinder.h"
#include <string.h>
#include "zstd_compress_internal.h"
#define HSIZE 1024
static U32 const HLOG = 10;
static U32 const MLS = 4;
static U32 const BADIDX = 0xffffffff;
static size_t simpleSequenceProducer(
void* sequenceProducerState,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
int compressionLevel,
size_t windowSize
) {
const BYTE* const istart = (const BYTE*)src;
const BYTE* const iend = istart + srcSize;
const BYTE* ip = istart;
const BYTE* anchor = istart;
size_t seqCount = 0;
U32 hashTable[HSIZE];
(void)sequenceProducerState;
(void)dict;
(void)dictSize;
(void)outSeqsCapacity;
(void)compressionLevel;
{ int i;
for (i=0; i < HSIZE; i++) {
hashTable[i] = BADIDX;
} }
while (ip + MLS < iend) {
size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS);
U32 const matchIndex = hashTable[hash];
hashTable[hash] = (U32)(ip - istart);
if (matchIndex != BADIDX) {
const BYTE* const match = istart + matchIndex;
U32 const matchLen = (U32)ZSTD_count(ip, match, iend);
if (matchLen >= ZSTD_MINMATCH_MIN) {
U32 const litLen = (U32)(ip - anchor);
U32 const offset = (U32)(ip - match);
ZSTD_Sequence const seq = {
offset, litLen, matchLen, 0
};
/* Note: it's crucial to stay within the window size! */
if (offset <= windowSize) {
outSeqs[seqCount++] = seq;
ip += matchLen;
anchor = ip;
continue;
}
}
}
ip++;
}
{ ZSTD_Sequence const finalSeq = {
0, (U32)(iend - anchor), 0, 0
};
outSeqs[seqCount++] = finalSeq;
}
return seqCount;
}
size_t zstreamSequenceProducer(
void* sequenceProducerState,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
int compressionLevel,
size_t windowSize
) {
EMF_testCase const testCase = *((EMF_testCase*)sequenceProducerState);
memset(outSeqs, 0, outSeqsCapacity);
switch (testCase) {
case EMF_ZERO_SEQS:
return 0;
case EMF_ONE_BIG_SEQ:
outSeqs[0].offset = 0;
outSeqs[0].matchLength = 0;
outSeqs[0].litLength = (U32)(srcSize);
return 1;
case EMF_LOTS_OF_SEQS:
return simpleSequenceProducer(
sequenceProducerState,
outSeqs, outSeqsCapacity,
src, srcSize,
dict, dictSize,
compressionLevel,
windowSize
);
case EMF_INVALID_OFFSET:
outSeqs[0].offset = 1 << 20;
outSeqs[0].matchLength = 4;
outSeqs[0].litLength = (U32)(srcSize - 4);
return 1;
case EMF_INVALID_MATCHLEN:
outSeqs[0].offset = 1;
outSeqs[0].matchLength = (U32)(srcSize);
outSeqs[0].litLength = 1;
return 1;
case EMF_INVALID_LITLEN:
outSeqs[0].offset = 0;
outSeqs[0].matchLength = 0;
outSeqs[0].litLength = (U32)(srcSize + 1);
return 1;
case EMF_INVALID_LAST_LITS:
outSeqs[0].offset = 1;
outSeqs[0].matchLength = 1;
outSeqs[0].litLength = 1;
outSeqs[1].offset = 0;
outSeqs[1].matchLength = 0;
outSeqs[1].litLength = (U32)(srcSize - 1);
return 2;
case EMF_SMALL_ERROR:
return outSeqsCapacity + 1;
case EMF_BIG_ERROR:
default:
return ZSTD_SEQUENCE_PRODUCER_ERROR;
}
}
|