1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
|
/*
* wchar_t helpers, version CPython >= 3.3.
*
* CPython 3.3 added support for sys.maxunicode == 0x10FFFF on all
* platforms, even ones with wchar_t limited to 2 bytes. As such,
* this code here works from the outside like wchar_helper.h in the
* case Py_UNICODE_SIZE == 4, but the implementation is very different.
*/
typedef uint16_t cffi_char16_t;
typedef uint32_t cffi_char32_t;
static PyObject *
_my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size)
{
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, w, size);
}
static PyObject *
_my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size)
{
/* are there any surrogate pairs, and if so, how many? */
Py_ssize_t i, count_surrogates = 0;
for (i = 0; i < size - 1; i++) {
if (0xD800 <= w[i] && w[i] <= 0xDBFF &&
0xDC00 <= w[i+1] && w[i+1] <= 0xDFFF)
count_surrogates++;
}
if (count_surrogates == 0) {
/* no, fast path */
return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, w, size);
}
else
{
PyObject *result = PyUnicode_New(size - count_surrogates, 0x10FFFF);
Py_UCS4 *data;
assert(PyUnicode_KIND(result) == PyUnicode_4BYTE_KIND);
data = PyUnicode_4BYTE_DATA(result);
for (i = 0; i < size; i++)
{
cffi_char32_t ch = w[i];
if (0xD800 <= ch && ch <= 0xDBFF && i < size - 1) {
cffi_char32_t ch2 = w[i + 1];
if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
i++;
}
}
*data++ = ch;
}
return result;
}
}
static int
_my_PyUnicode_AsSingleChar16(PyObject *unicode, cffi_char16_t *result,
char *err_got)
{
cffi_char32_t ch;
if (PyUnicode_GET_LENGTH(unicode) != 1) {
sprintf(err_got, "unicode string of length %zd",
PyUnicode_GET_LENGTH(unicode));
return -1;
}
ch = PyUnicode_READ_CHAR(unicode, 0);
if (ch > 0xFFFF)
{
sprintf(err_got, "larger-than-0xFFFF character");
return -1;
}
*result = (cffi_char16_t)ch;
return 0;
}
static int
_my_PyUnicode_AsSingleChar32(PyObject *unicode, cffi_char32_t *result,
char *err_got)
{
if (PyUnicode_GET_LENGTH(unicode) != 1) {
sprintf(err_got, "unicode string of length %zd",
PyUnicode_GET_LENGTH(unicode));
return -1;
}
*result = PyUnicode_READ_CHAR(unicode, 0);
return 0;
}
static Py_ssize_t _my_PyUnicode_SizeAsChar16(PyObject *unicode)
{
Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
Py_ssize_t result = length;
unsigned int kind = PyUnicode_KIND(unicode);
if (kind == PyUnicode_4BYTE_KIND)
{
Py_UCS4 *data = PyUnicode_4BYTE_DATA(unicode);
Py_ssize_t i;
for (i = 0; i < length; i++) {
if (data[i] > 0xFFFF)
result++;
}
}
return result;
}
static Py_ssize_t _my_PyUnicode_SizeAsChar32(PyObject *unicode)
{
return PyUnicode_GET_LENGTH(unicode);
}
static int _my_PyUnicode_AsChar16(PyObject *unicode,
cffi_char16_t *result,
Py_ssize_t resultlen)
{
Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
unsigned int kind = PyUnicode_KIND(unicode);
void *data = PyUnicode_DATA(unicode);
Py_ssize_t i;
for (i = 0; i < len; i++) {
cffi_char32_t ordinal = PyUnicode_READ(kind, data, i);
if (ordinal > 0xFFFF) {
if (ordinal > 0x10FFFF) {
PyErr_Format(PyExc_ValueError,
"unicode character out of range for "
"conversion to char16_t: 0x%x", (int)ordinal);
return -1;
}
ordinal -= 0x10000;
*result++ = 0xD800 | (ordinal >> 10);
*result++ = 0xDC00 | (ordinal & 0x3FF);
}
else
*result++ = ordinal;
}
return 0;
}
static int _my_PyUnicode_AsChar32(PyObject *unicode,
cffi_char32_t *result,
Py_ssize_t resultlen)
{
if (PyUnicode_AsUCS4(unicode, (Py_UCS4 *)result, resultlen, 0) == NULL)
return -1;
return 0;
}
|