1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
#include <assert.h>
#include <stdio.h>
#include <string.h>
FILE *discard;
typedef unsigned char uchar_t;
#define CERTAIN 1
#define UNCERTAIN 2
char state[32000];
#define kanji_code(s) ((s)[0]*256 + (s)[1])
uchar_t orig_kanji[50];
void
output (header, header_yomi, str)
uchar_t *header, *header_yomi, *str;
{
uchar_t this_header[30], kanji_yomi[30], *this_trailer;
unsigned code;
uchar_t *delim = strchr(str, '<');
uchar_t *rdelim = strchr(str, '>');
uchar_t new_header[30];
uchar_t new_header_yomi[30];
if (!delim) {
char buf[200];
strcpy(buf, header_yomi);
strcat(buf, str);
if (!strcmp(orig_kanji, buf)) {
fprintf(discard, "%s\n", orig_kanji);
} else {
printf("%s%s %s%s\n", header_yomi, str, header, str);
}
return;
}
strncpy(this_header, str, delim-str-2); this_header[delim-str-2] = 0;
code = kanji_code(delim-2);
assert(rdelim);
strncpy(kanji_yomi, delim+1, rdelim-delim-1); kanji_yomi[rdelim-delim-1] = 0;
this_trailer = rdelim+1;
sprintf(new_header, "%s%s%c%c", header, this_header, delim[-2], delim[-1]);
if (state[code-0x8080] == 0) {
sprintf(new_header_yomi, "%s%s%s", header_yomi, this_header, kanji_yomi);
output(new_header, new_header_yomi, this_trailer);
} else if (state[code-0x8080] & CERTAIN) {
sprintf(new_header_yomi, "%s%s%c%c", header_yomi, this_header,
delim[-2], delim[-1]);
output(new_header, new_header_yomi, this_trailer);
} else {
sprintf(new_header_yomi, "%s%s%s", header_yomi, this_header, kanji_yomi);
output(new_header, new_header_yomi, this_trailer);
sprintf(new_header_yomi, "%s%s%c%c", header_yomi, this_header,
delim[-2], delim[-1]);
output(new_header, new_header_yomi, this_trailer);
}
}
unsigned
read_next_code (f)
FILE *f;
{
char buf[BUFSIZ];
int dummy1;
uchar_t kanji[5];
char stroke[3];
int dummy2;
if (!fgets(buf, BUFSIZ, f)) return 0;
if (!sscanf(buf, "%d %s %x %s", &dummy1, stroke, &dummy2, kanji) == 4)
return -1;
return kanji_code(kanji);
}
void
read_certain_file(path, val)
char *path;
int val;
{
FILE *f;
int linenum;
char buf[BUFSIZ];
if (!(f = fopen(path, "r"))) {
perror(path);
exit(1);
}
linenum = 0;
while (fgets(buf, BUFSIZ, f)) {
uchar_t *s = strtok(buf, " \t\n");
unsigned code = s ? kanji_code(s) : 0;
linenum++;
if (!(code >= 0x8080 && code <= 0xffff)) {
fprintf(stderr, "%s:%d:ignoring %s.\n", path, linenum, s);
} else {
state[code-0x8080] = val;
}
while (s = strtok(NULL, " \t\n")) {
code = kanji_code(s);
if (!(code >= 0x8080 && code <= 0xffff)) {
fprintf(stderr, "%s:%d:ignoring %s.\n", path, linenum, s);
} else {
state[code-0x8080] = val;
}
}
}
fclose(f);
}
int
main (argc, argv)
int argc;
char **argv;
{
FILE *f;
uchar_t buf[BUFSIZ];
unsigned code;
int line;
if (argc < 3) {
fprintf(stderr, "reduce dictionary\n");
fprintf(stderr, "Usage: reduce file1 file2 <dictionary\n");
fprintf(stderr, " file1: certainly memorized t-code\n");
fprintf(stderr, " file2: uncertainly memorized t-code\n");
exit(1);
}
discard = fopen("discard", "w");
assert(discard);
read_certain_file(argv[1], CERTAIN);
read_certain_file(argv[2], UNCERTAIN);
f = stdin;
while (fgets(buf, BUFSIZ, f)) {
uchar_t *delim;
uchar_t header[30];
uchar_t *dest = orig_kanji, *src = buf;
buf[strlen(buf)-1] = '\0';
while (*src) {
if (*src == '<') {
while (*src != '>') src++;
src++;
}
*dest++ = *src++;
}
*dest = 0;
delim = strchr(buf, '<');
assert(delim >= buf+2); /* '<'$B$NA0$K$OA43Q0lJ8;z0J>e$J$-$c$$$1$J$$(B */
memset(header, 0, sizeof header);
strncpy(header, buf, (delim - buf - 2));
output(header, header, delim-2);
}
exit(0);
}
|