1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
|
/*
* getcwidth - Get the OS's idea of the width of Unicode codepoints
*
* This code is Copyright (c) 2013, by the authors of nmh. See the
* COPYRIGHT file in the root directory of the nmh distribution for
* complete copyright information.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef MULTIBYTE_SUPPORT
#include <locale.h>
#include <wchar.h>
#endif
#ifdef MULTIBYTE_SUPPORT
static void usage(char *);
static void dumpwidth(void);
static void getwidth(const char *);
#endif /* MULTIBYTE_SUPPORT */
int
main(int argc, char *argv[])
{
#ifndef MULTIBYTE_SUPPORT
(void) argc;
(void) argv;
fprintf(stderr, "Nmh was not configured with multibyte support\n");
exit(1);
#else /* MULTIBYTE_SUPPORT */
wchar_t c;
int i;
setlocale(LC_ALL, "");
if (argc < 2)
usage(argv[0]);
if (strcmp(argv[1], "--dump") == 0) {
if (argc == 2) {
dumpwidth();
exit(0);
} else {
fprintf(stderr, "--dump cannot be combined with "
"other arguments\n");
exit(1);
}
}
/*
* Process each argument. If it begins with "U+", then try to
* convert it to a Unicode codepoint. Otherwise, take each
* string and get the total width
*/
for (i = 1; i < argc; i++) {
if (strncmp(argv[i], "U+", 2) == 0) {
/*
* We're making a big assumption here that
* wchar_t represents a Unicode codepoint.
* That technically isn't valid unless the
* C compiler defines __STDC_ISO_10646__, but
* we're going to assume now that it works.
*/
errno = 0;
c = strtoul(argv[i] + 2, NULL, 16);
if (errno) {
fprintf(stderr, "Codepoint %s invalid\n",
argv[i]);
continue;
}
printf("%d\n", wcwidth(c));
} else {
getwidth(argv[i]);
}
}
exit(0);
}
static void
usage(char *argv0)
{
fprintf(stderr, "Usage: %s [--dump]\n", argv0);
fprintf(stderr, " %s U+XXXX [...]\n", argv0);
fprintf(stderr, " %s utf-8-sequence [...]\n", argv0);
fprintf(stderr, "Returns the column width of a Unicode codepoint "
"or UTF-8 character sequence\n");
fprintf(stderr, "\t--dump\tDump complete width table\n");
exit(1);
}
static void
getwidth(const char *string)
{
wchar_t c;
int charlen, charleft = strlen(string);
int length = 0;
/*
* In theory we should be able to use wcswidth(), but since we're
* testing out how the format libraries behave we'll do it a character
* at a time.
*/
mbtowc(NULL, NULL, 0);
while (charleft > 0) {
int clen;
charlen = mbtowc(&c, string, charleft);
if (charlen == 0)
break;
if (charlen < 0) {
fprintf(stderr, "Unable to convert string \"%s\"\n",
string);
return;
}
if ((clen = wcwidth(c)) < 0) {
fprintf(stderr, "U+%04lX non-printable\n",
(unsigned long int) c);
return;
}
length += clen;
string += charlen;
charleft -= charlen;
}
printf("%d\n", length);
}
static void
dumpwidth(void)
{
wchar_t wc, low;
int width, lastwidth;
for (wc = 0, low = 1, lastwidth = wcwidth(1); wc < 0xffff; wc++) {
width = wcwidth(wc+1);
if (width != lastwidth) {
printf("%04lX - %04lX = %d\n", (unsigned long int) low,
(unsigned long int) (wc), lastwidth);
low = wc+1;
}
lastwidth = width;
}
width = wcwidth(wc);
if (width == lastwidth)
printf("%04lX - %04lX = %d\n", (unsigned long int) low,
(unsigned long int) (wc), width);
#endif /* MULTIBYTE_SUPPORT */
}
|