1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
|
/* packer -- a program to pack a list of files into one */
/* The input is a file with one line per file to be packed.
Each line starts with either 'a' or 'b' for ascii or binary.
The second character is a space or tab (it's ignored).
The third character to the end of the line is the file to be packed.
The encoding is as follows:
A file starts with '!' for ascii or '#' for binary in column one.
The remainder of the line is a file name/path.
Ascii files have a special character in column 1:
'A'-'~' indicates 0 to n blanks. Leading Tabs are converted to blanks
and included in this count.
'0'-'9' indicates 0 to 9 blank lines (always followed by an additional newline).
Otherwise, all characters are just copied to output except:
'$' is an escape character:
$$ indicates $,
$@ through $_ indicate 0x00 through 0x1F,
$\n indicates an empty string (useful for avoiding long lines on output)
After $\n, the following character is not treated specially even though
it is in column 1.
Should we be doing more to compress files? It looks like the special
handling of leading blanks compresses files about 4%. This is not much,
but the encoding allows us to put markers (! and #) in column 1 to
separate files. Originally, leading blank handling also converted between
8 and 4 character tab stops, but you can no longer assume tab stops under,
say, Windows, are 8 characters wide. Source files should not have
tabs.
Further simple encoding such as run-length encoding and word
substitution doesn't buy too much and was deemed not worth the effort.
Run-length encoding seems to buy another couple of percent.
Substitution for common words like int, print, return, the, register,
etc. buys maybe .5% per word, but it seems unlikely this will buy
more than a total of 10%, so we're looking at a max of 15% to 20%
compression without starting to huffman encode at the bit level.
For binary files, every 3 bytes are used to form a 24-bit number which is
split into 4 fields of 6 bits. Each field is encoded by adding ascii '0'.
If only one or two bytes are left at the end of the file, the encoding is
as if zeros were appended to the file, but only 2 or 3 ascii characters
(instead of the usual 4) are output. The ascii file encoding is terminated
with a period ('.'). Newlines are inserted to keep line lengths down but
should be ignored by the reader.
*/
#include "switches.h"
#include "stdlib.h"
#include "string.h"
#include "cext.h"
#include "convert.h"
/* since we aren't using the cleanup package, expose exit(): */
#undef exit
#include "stdio.h"
#ifdef MACINTOSH
#include "console.h"
#endif
#define EOS 0
#define string_max 500
void pack_newline();
void pack_ascii();
void pack_binary();
void put_binary();
/* main -- pack a list of files */
/**/
int main(argc, argv)
int argc;
char *argv[];
{
FILE *inf; /* input file: a list of file names to pack */
FILE *outf; /* the packed output */
char filename[string_max]; /* holds names of input files */
char convname[string_max]; /* filename converted to local syntax */
int base = 1;
#ifdef MACINTOSH
argc = ccommand(&argv);
#endif
if (argc != 3 && argc != 4) {
fprintf(stderr, "Usage: packer [-p] input-list-file output-file\n");
exit(1);
}
if (argc == 4) {
base = 2;
if (strcmp(argv[1], "-p") == 0) {
pauseflag = 1;
} else {
fprintf(stderr, "Expected \"-p\" as 1st argument.\n");
exit(1);
}
}
inf = fopen(argv[base], "r");
if (!inf) {
fprintf(stderr, "Couldn't open |%s|\n", argv[base]);
exit(1);
}
outf = fopen(argv[base+1], "w");
if (!outf) {
fclose(inf);
fprintf(stderr, "Couldn't open |%s|\n", argv[base + 1]);
exit(1);
}
printf("Using tab width of %d\n", TAB_WIDTH);
while (fgets(filename, string_max, inf)) {
filename[strlen(filename) - 1] = EOS; /* remove newline at end */
if (filename[0] == EOS) continue; /* skip blank lines */
puts(filename);
strcpy(convname, filename + 2);
convert(convname);
if (filename[0] == 'a') pack_ascii(filename + 2, convname, outf);
else if (filename[0] == 'b') pack_binary(filename + 2, convname, outf);
else {
fprintf(stderr, "Bad file spec (expecting a or b in col 1): %s\n",
filename);
if (PAUSE) getchar();
}
}
fclose(outf);
fclose(inf);
return 0;
}
/* pack_ascii -- open filename and append its encoding to outf */
/**/
void pack_ascii(filename, convname, outf)
char *filename;
char *convname;
FILE *outf;
{
int line_len = 0;
int c;
FILE *inf;
inf = fopen(convname, "r");
/* printf("opened %lx\n", inf); */
if (!inf) {
fprintf(stderr, "Couldn't open |%s| - skipped\n", convname);
if (PAUSE) getchar();
return;
}
fprintf(outf, "!%s\n", filename);
pack_newline(inf, outf, &line_len);
while ((c = getc(inf)) != EOF) {
if (c > 127) {
fprintf(stderr, "non-ascii char 0x%x in %s.\n", c, convname);
exit(1);
} else if (c == '\n') {
putc(c, outf);
line_len = 0;
pack_newline(inf, outf, &line_len);
} else if (c == '$') {
putc('$', outf);
putc('$', outf);
line_len += 2;
} else if (c < 32) {
putc('$', outf);
putc('@' + c, outf);
line_len += 2;
} else {
putc(c, outf);
line_len++;
}
if (line_len > 70) {
putc('$', outf);
putc('\n', outf);
line_len = 0;
}
}
if (line_len) {
fprintf(stderr, "missing newline added to the end of %s\n", convname);
putc('\n', outf);
if (PAUSE) getchar();
}
/* printf("closing %lx\n", inf); */
fclose(inf);
}
/* pack_binary -- open binary filename and append its encoding to outf */
/**/
void pack_binary(filename, convname, outf)
char *filename;
char *convname;
FILE *outf;
{
int line_len = 0;
int c;
long data;
int n;
FILE *inf;
boolean isbinary = false;
inf = fopen(convname, "rb");
/* printf("opened %lx\n", inf); */
if (!inf) {
fprintf(stderr, "Couldn't open |%s| - skipped\n", convname);
if (PAUSE) getchar();
return;
}
fprintf(outf, "#%s\n", filename);
n = 0;
data = 0;
while ((c = getc(inf)) != EOF) {
if (c > 127) isbinary = true;
data = (data << 8) | c;
n++;
if (n == 3) {
put_binary(data, outf);
n = 0;
data = 0;
line_len += 4;
if (line_len >= 70) {
putc('\n', outf);
line_len = 0;
}
}
}
if (n == 1) {
data = data << 16;
putc('0' + ((data >> 18) & 0x3F), outf);
putc('0' + ((data >> 12) & 0x3F), outf);
}
if (n == 2) {
data = data << 8;
putc('0' + ((data >> 18) & 0x3F), outf);
putc('0' + ((data >> 12) & 0x3F), outf);
putc('0' + ((data >> 6) & 0x3F), outf);
}
putc('.', outf);
putc('\n', outf);
if (!isbinary) {
fprintf(stderr, "%s seems to be an ascii file.\n", convname);
if (PAUSE) getchar();
}
/* printf("closing %lx\n", inf); */
fclose(inf);
}
/* pack_newline -- newline sequence encoding to outf */
/**/
void pack_newline(inf, outf, line_len)
FILE *inf; /* input file */
FILE *outf; /* where to write output */
int *line_len;
{
int c;
int count = 0;
int outc;
while (((c = getc(inf)) != EOF) && (c == '\n')) {
count++;
}
while (count >= 10) {
fprintf(outf, "9\n");
*line_len = 0;
count -= 10;
}
if (count > 0) {
fprintf(outf, "%c\n", '0' + count - 1);
*line_len = 0;
}
/* now run-length encode leading blanks... */
count = 0;
while (c != EOF) {
if (c == ' ') count++;
/* we no longer convert tabs to spaces...
else if (c == '\t') count += TAB_WIDTH;
*/
else break;
c = getc(inf);
}
if (c != EOF || count) {
outc = 'A' + count;
if (outc > '~') outc = '~';
putc(outc, outf);
(*line_len) += 1;
count -= (outc - 'A');
while (count > 0) {
putc(' ', outf);
(*line_len) += 1;
count--;
}
}
/* now do the rest of the line */
if (c != EOF) ungetc(c, inf);
}
/* put_binary -- write 3 binary bytes as 4 ascii bytes */
/**/
void put_binary(data, outf)
long data;
FILE *outf;
{
putc('0' + ((data >> 18) & 0x3F), outf);
putc('0' + ((data >> 12) & 0x3F), outf);
putc('0' + ((data >> 6) & 0x3F), outf);
putc('0' + (data & 0x3F), outf);
}
|