File: wchar_helper.h

package info (click to toggle)
python-cffi 0.8.6-1~bpo70%2B1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy-backports
  • size: 1,444 kB
  • sloc: python: 12,599; ansic: 6,343; asm: 116; makefile: 84; sh: 24
file content (131 lines) | stat: -rw-r--r-- 3,450 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
 * wchar_t helpers
 */

#if (Py_UNICODE_SIZE == 2) && (SIZEOF_WCHAR_T == 4)
# define CONVERT_WCHAR_TO_SURROGATES
#endif


#ifdef CONVERT_WCHAR_TO_SURROGATES

/* Before Python 2.7, PyUnicode_FromWideChar is not able to convert
   wchar_t values greater than 65535 into two-unicode-characters surrogates.
   But even the Python 2.7 version doesn't detect wchar_t values that are
   out of range(1114112), and just returns nonsense.
*/
static PyObject *
_my_PyUnicode_FromWideChar(register const wchar_t *w,
                           Py_ssize_t size)
{
    PyObject *unicode;
    register Py_ssize_t i;
    Py_ssize_t alloc;
    const wchar_t *orig_w;

    if (w == NULL) {
        PyErr_BadInternalCall();
        return NULL;
    }

    alloc = size;
    orig_w = w;
    for (i = size; i > 0; i--) {
        if (*w > 0xFFFF)
            alloc++;
        w++;
    }
    w = orig_w;
    unicode = PyUnicode_FromUnicode(NULL, alloc);
    if (!unicode)
        return NULL;

    /* Copy the wchar_t data into the new object */
    {
        register Py_UNICODE *u;
        u = PyUnicode_AS_UNICODE(unicode);
        for (i = size; i > 0; i--) {
            if (((unsigned int)*w) > 0xFFFF) {
                wchar_t ordinal;
                if (((unsigned int)*w) > 0x10FFFF) {
                    PyErr_Format(PyExc_ValueError,
                                 "wchar_t out of range for "
                                 "conversion to unicode: 0x%x", (int)*w);
                    Py_DECREF(unicode);
                    return NULL;
                }
                ordinal = *w++;
                ordinal -= 0x10000;
                *u++ = 0xD800 | (ordinal >> 10);
                *u++ = 0xDC00 | (ordinal & 0x3FF);
            }
            else
                *u++ = *w++;
        }
    }
    return unicode;
}

#else

# define _my_PyUnicode_FromWideChar PyUnicode_FromWideChar

#endif


#define IS_SURROGATE(u)   (0xD800 <= (u)[0] && (u)[0] <= 0xDBFF &&   \
                           0xDC00 <= (u)[1] && (u)[1] <= 0xDFFF)
#define AS_SURROGATE(u)   (0x10000 + (((u)[0] - 0xD800) << 10) +     \
                                     ((u)[1] - 0xDC00))

static int _my_PyUnicode_AsSingleWideChar(PyObject *unicode, wchar_t *result)
{
    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
    if (PyUnicode_GET_SIZE(unicode) == 1) {
        *result = (wchar_t)(u[0]);
        return 0;
    }
#ifdef CONVERT_WCHAR_TO_SURROGATES
    if (PyUnicode_GET_SIZE(unicode) == 2 && IS_SURROGATE(u)) {
        *result = AS_SURROGATE(u);
        return 0;
    }
#endif
    return -1;
}

static Py_ssize_t _my_PyUnicode_SizeAsWideChar(PyObject *unicode)
{
    Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
    Py_ssize_t result = length;

#ifdef CONVERT_WCHAR_TO_SURROGATES
    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
    Py_ssize_t i;

    for (i=0; i<length-1; i++) {
        if (IS_SURROGATE(u+i))
            result--;
    }
#endif
    return result;
}

static void _my_PyUnicode_AsWideChar(PyObject *unicode,
                                     wchar_t *result,
                                     Py_ssize_t resultlen)
{
    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
    Py_ssize_t i;
    for (i=0; i<resultlen; i++) {
        wchar_t ordinal = *u;
#ifdef CONVERT_WCHAR_TO_SURROGATES
        if (IS_SURROGATE(u)) {
            ordinal = AS_SURROGATE(u);
            u++;
        }
#endif
        result[i] = ordinal;
        u++;
    }
}