1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
|
#ifndef LDM_H
#define LDM_H
#include "mem.h" // from /lib/common/mem.h
//#include "ldm_params.h"
// =============================================================================
// Modify the parameters in ldm_params.h if "ldm_params.h" is included.
// Otherwise, modify the parameters here.
// =============================================================================
#ifndef LDM_PARAMS_H
// Defines the size of the hash table.
// Note that this is not the number of buckets.
// Currently this should be less than WINDOW_SIZE_LOG + 4.
#define LDM_MEMORY_USAGE 23
// The number of entries in a hash bucket.
#define HASH_BUCKET_SIZE_LOG 3 // The maximum is 4 for now.
// Defines the lag in inserting elements into the hash table.
#define LDM_LAG 0
// The maximum window size when searching for matches.
// The maximum value is 30
#define LDM_WINDOW_SIZE_LOG 28
// The minimum match length.
// This should be a multiple of four.
#define LDM_MIN_MATCH_LENGTH 64
// If INSERT_BY_TAG, insert entries into the hash table as a function of the
// hash. Certain hashes will not be inserted.
//
// Otherwise, insert as a function of the position.
#define INSERT_BY_TAG 1
// Store a checksum with the hash table entries for faster comparison.
// This halves the number of entries the hash table can contain.
#define USE_CHECKSUM 1
#endif
// Output compression statistics.
#define COMPUTE_STATS
// Output the configuration.
#define OUTPUT_CONFIGURATION
// If defined, forces the probability of insertion to be approximately
// one per (1 << HASH_ONLY_EVERY_LOG). If not defined, the probability will be
// calculated based on the memory usage and window size for "even" insertion
// throughout the window.
// #define HASH_ONLY_EVERY_LOG 8
// =============================================================================
// The number of bytes storing the compressed and decompressed size
// in the header.
#define LDM_COMPRESSED_SIZE 8
#define LDM_DECOMPRESSED_SIZE 8
#define LDM_HEADER_SIZE ((LDM_COMPRESSED_SIZE)+(LDM_DECOMPRESSED_SIZE))
#define ML_BITS 4
#define ML_MASK ((1U<<ML_BITS)-1)
#define RUN_BITS (8-ML_BITS)
#define RUN_MASK ((1U<<RUN_BITS)-1)
// The number of bytes storing the offset.
#define LDM_OFFSET_SIZE 4
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
// TODO: Match lengths that are too small do not use the hash table efficiently.
// There should be a minimum hash length given the hash table size.
#define LDM_HASH_LENGTH LDM_MIN_MATCH_LENGTH
typedef struct LDM_compressStats LDM_compressStats;
typedef struct LDM_CCtx LDM_CCtx;
typedef struct LDM_DCtx LDM_DCtx;
/**
* Compresses src into dst.
* Returns the compressed size if successful, 0 otherwise.
*
* NB: This currently ignores maxDstSize and assumes enough space is available.
*
* Block format (see lz4 documentation for more information):
* github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
*
* A block is composed of sequences. Each sequence begins with a token, which
* is a one-byte value separated into two 4-bit fields.
*
* The first field uses the four high bits of the token and encodes the literal
* length. If the field value is 0, there is no literal. If it is 15,
* additional bytes are added (each ranging from 0 to 255) to the previous
* value to produce a total length.
*
* Following the token and optional length bytes are the literals.
*
* Next are the 4 bytes representing the offset of the match (2 in lz4),
* representing the position to copy the literals.
*
* The lower four bits of the token encode the match length. With additional
* bytes added similarly to the additional literal length bytes after the offset.
*
* The last sequence is incomplete and stops right after the literals.
*/
size_t LDM_compress(const void *src, size_t srcSize,
void *dst, size_t maxDstSize);
/**
* Initialize the compression context.
*
* Allocates memory for the hash table.
*
* Returns 0 if successful, 1 otherwise.
*/
size_t LDM_initializeCCtx(LDM_CCtx *cctx,
const void *src, size_t srcSize,
void *dst, size_t maxDstSize);
/**
* Frees up memory allocated in LDM_initializeCCtx().
*/
void LDM_destroyCCtx(LDM_CCtx *cctx);
/**
* Prints the distribution of offsets in the hash table.
*
* The offsets are defined as the distance of the hash table entry from the
* current input position of the cctx.
*/
void LDM_outputHashTableOffsetHistogram(const LDM_CCtx *cctx);
/**
* Outputs compression statistics to stdout.
*/
void LDM_printCompressStats(const LDM_compressStats *stats);
/**
* Encode the literal length followed by the literals.
*
* The literal length is written to the upper four bits of pToken, with
* additional bytes written to the output as needed (see lz4).
*
* This is followed by literalLength bytes corresponding to the literals.
*/
void LDM_encodeLiteralLengthAndLiterals(LDM_CCtx *cctx, BYTE *pToken,
const U64 literalLength);
/**
* Write current block (literals, literal length, match offset,
* match length).
*/
void LDM_outputBlock(LDM_CCtx *cctx,
const U64 literalLength,
const U32 offset,
const U64 matchLength);
/**
* Decompresses src into dst.
*
* Note: assumes src does not have a header.
*/
size_t LDM_decompress(const void *src, size_t srcSize,
void *dst, size_t maxDstSize);
/**
* Initialize the decompression context.
*/
void LDM_initializeDCtx(LDM_DCtx *dctx,
const void *src, size_t compressedSize,
void *dst, size_t maxDecompressedSize);
/**
* Reads the header from src and writes the compressed size and
* decompressed size into compressedSize and decompressedSize respectively.
*
* NB: LDM_compress and LDM_decompress currently do not add/read headers.
*/
void LDM_readHeader(const void *src, U64 *compressedSize,
U64 *decompressedSize);
/**
* Write the compressed and decompressed size.
*/
void LDM_writeHeader(void *memPtr, U64 compressedSize,
U64 decompressedSize);
/**
* Output the configuration used.
*/
void LDM_outputConfiguration(void);
#endif /* LDM_H */
|