1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
|
/*
Copyright (c) 2016 Corinna Vinschen <corinna@vinschen.de>
Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling
*/
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <wctype.h>
#include <stdint.h>
//#include <errno.h>
#include "local.h"
/*
struct caseconv_entry describes the case conversion behaviour
of a range of Unicode characters.
It was designed to be compact for a minimal table size.
The range is first...first + diff.
Conversion behaviour for a character c in the respective range:
mode == TOLO towlower (c) = c + delta
mode == TOUP towupper (c) = c + delta
mode == TOBOTH (titling case characters)
towlower (c) = c + 1
towupper (c) = c - 1
mode == TO1 capital/small letters are alternating
delta == EVENCAP even codes are capital
delta == ODDCAP odd codes are capital
(this correlates with an even/odd first range value
as of Unicode 10.0 but we do not rely on this)
As of Unicode 10.0, the following field lengths are sufficient
first: 17 bits
diff: 8 bits
delta: 17 bits
mode: 2 bits
The reserve of 4 bits (to limit the struct to 6 bytes)
is currently added to the 'first' field;
should a future Unicode version make it necessary to expand the others,
the 'first' field could be reduced as needed, or larger ranges could
be split up (reduce limit max=255 e.g. to max=127 or max=63 in
script mkcaseconv, check increasing table size).
*/
enum {TO1, TOLO, TOUP, TOBOTH};
enum {EVENCAP, ODDCAP};
static struct caseconv_entry {
uint_least32_t first: 21;
uint_least32_t diff: 8;
uint_least32_t mode: 2;
int_least32_t delta: 17;
}
#ifdef _HAVE_BITFIELDS_IN_PACKED_STRUCTS
__attribute__((packed))
#endif
caseconv_table [] = {
#include "caseconv.t"
};
#define first(ce) ((wint_t) ce.first)
#define last(ce) ((wint_t) (ce.first + ce.diff))
/* auxiliary function for binary search in interval properties table */
static const struct caseconv_entry *
bisearch (wint_t ucs, const struct caseconv_entry *table, int max)
{
int min = 0;
int mid;
if (ucs < first(table[0]) || ucs > last(table[max]))
return 0;
while (max >= min)
{
mid = (min + max) / 2;
if (ucs > last(table[mid]))
min = mid + 1;
else if (ucs < first(table[mid]))
max = mid - 1;
else
return &table[mid];
}
return 0;
}
static wint_t
toulower (wint_t c)
{
const struct caseconv_entry * cce =
bisearch(c, caseconv_table,
sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
if (cce)
switch (cce->mode)
{
case TOLO:
return c + cce->delta;
case TOBOTH:
return c + 1;
case TO1:
switch (cce->delta)
{
case EVENCAP:
if (!(c & 1))
return c + 1;
break;
case ODDCAP:
if (c & 1)
return c + 1;
break;
default:
break;
}
default:
break;
}
return c;
}
static wint_t
touupper (wint_t c)
{
const struct caseconv_entry * cce =
bisearch(c, caseconv_table,
sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
if (cce)
switch (cce->mode)
{
case TOUP:
return c + cce->delta;
case TOBOTH:
return c - 1;
case TO1:
switch (cce->delta)
{
case EVENCAP:
if (c & 1)
return c - 1;
break;
case ODDCAP:
if (!(c & 1))
return c - 1;
break;
default:
break;
}
default:
break;
}
return c;
}
wint_t
towctrans_l (wint_t c, wctrans_t w, struct __locale_t *locale)
{
(void) locale;
#ifdef _MB_CAPABLE
wint_t u = _jp2uc_l (c, locale);
#else
wint_t u = c;
#endif
wint_t res;
if (w == WCT_TOLOWER)
res = toulower (u);
else if (w == WCT_TOUPPER)
res = touupper (u);
else
{
// skipping the errno setting that was previously involved
// by delegating to towctrans; it was causing trouble (cygwin crash)
// and there is no errno specified for towctrans
return c;
}
if (res != u)
#ifdef _MB_CAPABLE
return _uc2jp_l (res, locale);
#else
return res;
#endif
else
return c;
}
|