1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
|
/*
* cstable.c - libcharset supporting utility which draws up a map
* of the whole Unicode BMP and annotates it with details of which
* other character sets each character appears in.
*
* Note this is not a libcharset _client_; it is part of the
* libcharset _package_, using libcharset internals.
*/
#include <stdio.h>
#include <string.h>
#include "charset.h"
#include "internal.h"
#include "sbcsdat.h"
#define ENUM_CHARSET(x) extern charset_spec const charset_##x;
#include "enum.c"
#undef ENUM_CHARSET
static charset_spec const *const cs_table[] = {
#define ENUM_CHARSET(x) &charset_##x,
#include "enum.c"
#undef ENUM_CHARSET
};
static const char *const cs_names[] = {
#define ENUM_CHARSET(x) #x,
#include "enum.c"
#undef ENUM_CHARSET
};
int main(int argc, char **argv)
{
long int c;
int internal_names = FALSE;
int verbose = FALSE;
while (--argc) {
char *p = *++argv;
if (!strcmp(p, "-i"))
internal_names = TRUE;
else if (!strcmp(p, "-v"))
verbose = TRUE;
}
for (c = 0; c < 0x30000; c++) {
int i, plane, row, col, chr;
char const *sep = "";
printf("U+%04x:", c);
/*
* Look up in SBCSes.
*/
for (i = 0; i < lenof(cs_table); i++)
if (cs_table[i]->read == read_sbcs &&
(chr = sbcs_from_unicode(cs_table[i]->data, c)) != ERROR) {
printf("%s %s", sep,
(internal_names ? cs_names[i] :
charset_to_localenc(cs_table[i]->charset)));
if (verbose)
printf("[%d]", chr);
sep = ";";
}
/*
* Look up individually in MBCS base charsets. The
* `internal_names' flag does not affect these, because
* MBCS base charsets aren't directly encoded by CS_*
* constants.
*/
if (unicode_to_big5(c, &row, &col)) {
printf("%s Big5", sep);
if (verbose)
printf("[%d,%d]", row, col);
sep = ";";
}
if (unicode_to_gb2312(c, &row, &col)) {
printf("%s GB2312", sep);
if (verbose)
printf("[%d,%d]", row, col);
sep = ";";
}
if (unicode_to_jisx0208(c, &row, &col)) {
printf("%s JIS X 0208", sep);
if (verbose)
printf("[%d,%d]", row, col);
sep = ";";
}
if (unicode_to_ksx1001(c, &row, &col)) {
printf("%s KS X 1001", sep);
if (verbose)
printf("[%d,%d]", row, col);
sep = ";";
}
if (unicode_to_cp949(c, &row, &col)) {
printf("%s CP949", sep);
if (verbose)
printf("[%d,%d]", row, col);
sep = ";";
}
if (unicode_to_cns11643(c, &plane, &row, &col)) {
printf("%s CNS11643", sep);
if (verbose)
printf("[%d,%d,%d]", plane, row, col);
sep = ";";
}
if (!*sep)
printf(" unicode-only");
printf("\n");
}
return 0;
}
|