1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
|
/*
* jchuff.c
*
* Copyright (C) 1991-1998, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains Huffman entropy decoding routines which are shared
* by the sequential, progressive and lossless decoders.
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jchuff.h" /* Declarations shared with jc*huff.c */
/*
* Compute the derived values for a Huffman table.
* This routine also performs some validation checks on the table.
*/
GLOBAL(void)
jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
c_derived_tbl ** pdtbl)
{
JHUFF_TBL *htbl;
c_derived_tbl *dtbl;
int p, i, l, lastp, si, maxsymbol;
char huffsize[257];
unsigned int huffcode[257];
unsigned int code;
/* Note that huffsize[] and huffcode[] are filled in code-length order,
* paralleling the order of the symbols themselves in htbl->huffval[].
*/
/* Find the input Huffman table */
if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
htbl =
isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
if (htbl == NULL)
ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
/* Allocate a workspace if we haven't already done so. */
if (*pdtbl == NULL)
*pdtbl = (c_derived_tbl *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
SIZEOF(c_derived_tbl));
dtbl = *pdtbl;
/* Figure C.1: make table of Huffman code length for each symbol */
p = 0;
for (l = 1; l <= 16; l++) {
i = (int) htbl->bits[l];
if (i < 0 || p + i > 256) /* protect against table overrun */
ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
while (i--)
huffsize[p++] = (char) l;
}
huffsize[p] = 0;
lastp = p;
/* Figure C.2: generate the codes themselves */
/* We also validate that the counts represent a legal Huffman code tree. */
code = 0;
si = huffsize[0];
p = 0;
while (huffsize[p]) {
while (((int) huffsize[p]) == si) {
huffcode[p++] = code;
code++;
}
/* code is now 1 more than the last code used for codelength si; but
* it must still fit in si bits, since no code is allowed to be all ones.
* BUG FIX 2001-09-03: Comparison must be >, not >=
*/
if (((INT32) code) > (((INT32) 1) << si))
ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
code <<= 1;
si++;
}
/* Figure C.3: generate encoding tables */
/* These are code and size indexed by symbol value */
/* Set all codeless symbols to have code length 0;
* this lets us detect duplicate VAL entries here, and later
* allows emit_bits to detect any attempt to emit such symbols.
*/
MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
/* This is also a convenient place to check for out-of-range
* and duplicated VAL entries. We allow 0..255 for AC symbols
* but only 0..16 for DC. (We could constrain them further
* based on data depth and mode, but this seems enough.)
*/
maxsymbol = isDC ? 16 : 255;
for (p = 0; p < lastp; p++) {
i = htbl->huffval[p];
if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
dtbl->ehufco[i] = huffcode[p];
dtbl->ehufsi[i] = huffsize[p];
}
}
/*
* Generate the best Huffman code table for the given counts, fill htbl.
*
* The JPEG standard requires that no symbol be assigned a codeword of all
* one bits (so that padding bits added at the end of a compressed segment
* can't look like a valid code). Because of the canonical ordering of
* codewords, this just means that there must be an unused slot in the
* longest codeword length category. Section K.2 of the JPEG spec suggests
* reserving such a slot by pretending that symbol 256 is a valid symbol
* with count 1. In theory that's not optimal; giving it count zero but
* including it in the symbol set anyway should give a better Huffman code.
* But the theoretically better code actually seems to come out worse in
* practice, because it produces more all-ones bytes (which incur stuffed
* zero bytes in the final file). In any case the difference is tiny.
*
* The JPEG standard requires Huffman codes to be no more than 16 bits long.
* If some symbols have a very small but nonzero probability, the Huffman tree
* must be adjusted to meet the code length restriction. We currently use
* the adjustment method suggested in JPEG section K.2. This method is *not*
* optimal; it may not choose the best possible limited-length code. But
* typically only very-low-frequency symbols will be given less-than-optimal
* lengths, so the code is almost optimal. Experimental comparisons against
* an optimal limited-length-code algorithm indicate that the difference is
* microscopic --- usually less than a hundredth of a percent of total size.
* So the extra complexity of an optimal algorithm doesn't seem worthwhile.
*/
GLOBAL(void)
jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
{
#define MAX_CLEN 32 /* assumed maximum initial code length */
UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */
int codesize[257]; /* codesize[k] = code length of symbol k */
int others[257]; /* next symbol in current branch of tree */
int c1, c2;
int p, i, j;
long v;
/* This algorithm is explained in section K.2 of the JPEG standard */
MEMZERO(bits, SIZEOF(bits));
MEMZERO(codesize, SIZEOF(codesize));
for (i = 0; i < 257; i++)
others[i] = -1; /* init links to empty */
freq[256] = 1; /* make sure 256 has a nonzero count */
/* Including the pseudo-symbol 256 in the Huffman procedure guarantees
* that no real symbol is given code-value of all ones, because 256
* will be placed last in the largest codeword category.
*/
/* Huffman's basic algorithm to assign optimal code lengths to symbols */
for (;;) {
/* Find the smallest nonzero frequency, set c1 = its symbol */
/* In case of ties, take the larger symbol number */
c1 = -1;
v = 1000000000L;
for (i = 0; i <= 256; i++) {
if (freq[i] && freq[i] <= v) {
v = freq[i];
c1 = i;
}
}
/* Find the next smallest nonzero frequency, set c2 = its symbol */
/* In case of ties, take the larger symbol number */
c2 = -1;
v = 1000000000L;
for (i = 0; i <= 256; i++) {
if (freq[i] && freq[i] <= v && i != c1) {
v = freq[i];
c2 = i;
}
}
/* Done if we've merged everything into one frequency */
if (c2 < 0)
break;
/* Else merge the two counts/trees */
freq[c1] += freq[c2];
freq[c2] = 0;
/* Increment the codesize of everything in c1's tree branch */
codesize[c1]++;
while (others[c1] >= 0) {
c1 = others[c1];
codesize[c1]++;
}
others[c1] = c2; /* chain c2 onto c1's tree branch */
/* Increment the codesize of everything in c2's tree branch */
codesize[c2]++;
while (others[c2] >= 0) {
c2 = others[c2];
codesize[c2]++;
}
}
/* Now count the number of symbols of each code length */
for (i = 0; i <= 256; i++) {
if (codesize[i]) {
/* The JPEG standard seems to think that this can't happen, */
/* but I'm paranoid... */
if (codesize[i] > MAX_CLEN)
ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
bits[codesize[i]]++;
}
}
/* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
* Huffman procedure assigned any such lengths, we must adjust the coding.
* Here is what the JPEG spec says about how this next bit works:
* Since symbols are paired for the longest Huffman code, the symbols are
* removed from this length category two at a time. The prefix for the pair
* (which is one bit shorter) is allocated to one of the pair; then,
* skipping the BITS entry for that prefix length, a code word from the next
* shortest nonzero BITS entry is converted into a prefix for two code words
* one bit longer.
*/
for (i = MAX_CLEN; i > 16; i--) {
while (bits[i] > 0) {
j = i - 2; /* find length of new prefix to be used */
while (bits[j] == 0)
j--;
bits[i] -= 2; /* remove two symbols */
bits[i-1]++; /* one goes in this length */
bits[j+1] += 2; /* two new symbols in this length */
bits[j]--; /* symbol of this length is now a prefix */
}
}
/* Remove the count for the pseudo-symbol 256 from the largest codelength */
while (bits[i] == 0) /* find largest codelength still in use */
i--;
bits[i]--;
/* Return final symbol counts (only for lengths 0..16) */
MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
/* Return a list of the symbols sorted by code length */
/* It's not real clear to me why we don't need to consider the codelength
* changes made above, but the JPEG spec seems to think this works.
*/
p = 0;
for (i = 1; i <= MAX_CLEN; i++) {
for (j = 0; j <= 255; j++) {
if (codesize[j] == i) {
htbl->huffval[p] = (UINT8) j;
p++;
}
}
}
/* Set sent_table FALSE so updated table will be written to JPEG file. */
htbl->sent_table = FALSE;
}
|