File: libulzw_decomp.c

package info (click to toggle)
librnd 4.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 12,812 kB
  • sloc: ansic: 126,990; sh: 2,602; makefile: 2,145; awk: 7
file content (142 lines) | stat: -rw-r--r-- 5,819 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/*//////////////////////////////////////////////////////////////////////////
//                            **** LIBULZW ****                           //
//               Adjusted Binary LZW Compressor/Decompressor              //
//                     Copyright (c) 2016 David Bryant                    //
//                           All Rights Reserved                          //
//      Distributed under the BSD Software License (see license.txt)      //
//////////////////////////////////////////////////////////////////////////*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifndef ULZW_CODES_DEFINED
#define ULZW_CODES_DEFINED 1
#define ULZW_NULL_CODE       -1  /* indicates a NULL prefix */
#define ULZW_CLEAR_CODE      256 /* code to flush dictionary and restart decoder */
#define ULZW_FIRST_STRING    257 /* code of first dictionary string */
#endif

/* LZW decompression function. Bytes (8-bit) are read and written through callbacks. The
 * "maxbits" parameter is read as the first byte in the stream and controls how much memory
 * is allocated for decoding. A return value of EOF from the "src" callback terminates the
 * compression process (although this should not normally occur). A non-zero return value
 * indicates an error, which in this case can be a bad "maxbits" read from the stream, a
 * failed malloc(), or if an EOF is read from the input stream before the compression
 * terminates naturally with END_CODE.
 */

int ulzw_decompress(void *ctx, void (*dst)(void *, int), int(*src)(void *))
{
	int read_byte, next = ULZW_FIRST_STRING, prefix = ULZW_CLEAR_CODE, bits = 0, total_codes;
	unsigned char *terminators, *reverse_buffer;
	unsigned long shifter = 0;
	short *prefixes;

	if ((read_byte = ((*src) (ctx))) == EOF || (read_byte & 0xfc)) /*sanitize first byte */
		return 1;

	/* based on the "maxbits" parameter, compute total codes and allocate dictionary storage */

	total_codes = 512 << (read_byte & 0x3);
	reverse_buffer = malloc((total_codes - 256) * sizeof(reverse_buffer[0]));
	prefixes = malloc((total_codes - 256) * sizeof(prefixes[0]));
	terminators = malloc((total_codes - 256) * sizeof(terminators[0]));

	if (!reverse_buffer || !prefixes || !terminators) /* check for mallco() failure */
		return 1;

	/* This is the main loop where we read input symbols. The values range from 0 to the code value
	   of the "next" string in the dictionary (although the actual "next" code cannot be used yet,
	   and so we reserve that code for the END_CODE). Note that receiving an EOF from the input
	   stream is actually an error because we should have gotten the END_CODE first. */

	while(1) {
		int code_bits = next < 1024 ? (next < 512 ? 8 : 9) : (next < 2048 ? 10 : 11), code;
		int extras = (1 << (code_bits + 1)) - next - 1;

		do {
			if ((read_byte = ((*src) (ctx))) == EOF) {
				free(terminators);
				free(prefixes);
				free(reverse_buffer);
				return 1;
			}

			shifter |= (long)read_byte << bits;
		} while((bits += 8) < code_bits);

		/* first we assume the code will fit in the minimum number of required bits */

		code = (int)shifter & ((1 << code_bits) - 1);
		shifter >>= code_bits;
		bits -= code_bits;

		/* but if code >= extras, then we need to read another bit to calculate the real code
		   (this is the "adjusted binary" part) */

		if (code >= extras) {
			if (!bits) {
				if ((read_byte = ((*src) (ctx))) == EOF) {
					free(terminators);
					free(prefixes);
					free(reverse_buffer);
					return 1;
				}

				shifter = (long)read_byte;
				bits = 8;
			}

			code = (code << 1) - extras + (shifter & 1);
			shifter >>= 1;
			bits--;
		}

		if (code == next) /* sending the maximum code is reserved for the end of the file */
			break;
		else if (code == ULZW_CLEAR_CODE) /* otherwise check for a ULZW_CLEAR_CODE to start over early */
			next = ULZW_FIRST_STRING;
		else if (prefix == ULZW_CLEAR_CODE) { /* this only happens at the first symbol which is always sent */
			(*dst) (ctx, code); /* literally and becomes our initial prefix */
			next++;
		}
		/* Otherwise we have a valid prefix so we step through the string from end to beginning storing the
		   bytes in the "reverse_buffer", and then we send them out in the proper order. One corner-case
		   we have to handle here is that the string might be the same one that is actually being defined
		   now (code == next-1). Also, the first 256 entries of "terminators" and "prefixes" are fixed and
		   not allocated, so that messes things up a bit. */
		else {
			int cti = (code == next - 1) ? prefix : code;
			unsigned char *rbp = reverse_buffer, c;

			do
				*rbp++ = cti < 256 ? cti : terminators[cti - 256]; /* step backward through string... */
			while((cti = (cti < 256) ? ULZW_NULL_CODE : prefixes[cti - 256]) != ULZW_NULL_CODE);

			c = *--rbp; /* the first byte in this string is the terminator for the last string, which is */
			/* the one that we'll create a new dictionary entry for this time */

			do
				(*dst) (ctx, *rbp); /* send string in corrected order (except for the terminator */
			while(rbp-- != reverse_buffer); /* which we don't know yet) */

			if (code == next - 1)
				(*dst) (ctx, c);

			prefixes[next - 1 - 256] = prefix; /* now update the next dictionary entry with the new string */
			terminators[next - 1 - 256] = c; /* (but we're always one behind, so it's not the string just sent) */

			if (++next == total_codes) /* check for full dictionary, which forces a reset (and, BTW, */
				next = ULZW_FIRST_STRING; /* means we'll never use the dictionary entry we just wrote) */
		}

		prefix = code; /* the code we just received becomes the prefix for the next dictionary string entry */
		/* (which we'll create once we find out the terminator) */
	}

	free(terminators);
	free(prefixes);
	free(reverse_buffer);
	return 0;
}