File: interp_unicode.py

package info (click to toggle)
pypy3 7.3.19%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212,236 kB
  • sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4
file content (171 lines) | stat: -rw-r--r-- 7,301 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.rlib import rutf8
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.unicodehelper import wcharpsize2utf8
from pypy.objspace.std import unicodeobject
from pypy.module._hpy_universal.apiset import API

def _maybe_utf8_to_w(space, utf8):
    # should this be a method of space?
    s = rffi.constcharp2str(utf8)
    try:
        length = rutf8.check_utf8(s, allow_surrogates=False)
    except rutf8.CheckError:
        raise   # XXX do something
    return space.newtext(s, length)

@API.func("int HPyUnicode_Check(HPyContext *ctx, HPy h)", error_value=API.int(-1))
def HPyUnicode_Check(space, handles, ctx, h):
    w_obj = handles.deref(h)
    w_obj_type = space.type(w_obj)
    res = (space.is_w(w_obj_type, space.w_unicode) or
           space.issubtype_w(w_obj_type, space.w_unicode))
    return API.int(res)

@API.func("HPy HPyUnicode_FromString(HPyContext *ctx, const char *utf8)")
def HPyUnicode_FromString(space, handles, ctx, utf8):
    w_obj = _maybe_utf8_to_w(space, utf8)
    return handles.new(w_obj)

@API.func("HPy HPyUnicode_AsUTF8String(HPyContext *ctx, HPy h)")
def HPyUnicode_AsUTF8String(space, handles, ctx, h):
    w_unicode = handles.deref(h)
    # XXX: what should we do if w_unicode is not a str?
    w_bytes = unicodeobject.encode_object(space, w_unicode, 'utf-8', 'strict')
    return handles.new(w_bytes)

@API.func("HPy HPyUnicode_AsASCIIString(HPyContext *ctx, HPy h)")
def HPyUnicode_AsASCIIString(space, handles, ctx, h):
    w_unicode = handles.deref(h)
    w_bytes = unicodeobject.encode_object(space, w_unicode, 'ascii', 'strict')
    return handles.new(w_bytes)

@API.func("HPy HPyUnicode_AsLatin1String(HPyContext *ctx, HPy h)")
def HPyUnicode_AsLatin1String(space, handles, ctx, h):
    w_unicode = handles.deref(h)
    w_bytes = unicodeobject.encode_object(space, w_unicode, 'latin1', 'strict')
    return handles.new(w_bytes)

@API.func("HPy HPyUnicode_EncodeFSDefault(HPyContext *ctx, HPy h)")
def HPyUnicode_EncodeFSDefault(space, handles, ctx, h):
    w_unicode = handles.deref(h)
    w_bytes = space.fsencode(w_unicode)
    return handles.new(w_bytes)
 
@API.func("const char *HPyUnicode_AsUTF8AndSize(HPyContext *ctx, HPy h, HPy_ssize_t *size)")
def HPyUnicode_AsUTF8AndSize(space, handles, ctx, h, size):
    w_unicode = handles.deref(h)
    # XXX: what should we do if w_unicode is not a str?
    s = space.utf8_w(w_unicode)
    if size:
        size[0] = len(s)
    res = handles.str2ownedptr(s, owner=h)
    return rffi.cast(rffi.CONST_CCHARP, res)

@API.func("HPy HPyUnicode_FromWideChar(HPyContext *ctx, const wchar_t *w, HPy_ssize_t size)")
def HPyUnicode_FromWideChar(space, handles, ctx, wchar_p, size):
    # remove the "const", else we can't call wcharpsize2utf8 later
    wchar_p = rffi.cast(rffi.CWCHARP, wchar_p)
    if wchar_p:
        if size == -1:
            size = wcharplen(wchar_p)
        # WRITE TEST: this automatically raises "character not in range", but
        # we don't have any test for it
        s = wcharpsize2utf8(space, wchar_p, size)
        w_obj = space.newutf8(s, size)
        return handles.new(w_obj)
    else:
        # cpyext returns an empty string, we need a test
        raise NotImplementedError("WRITE TEST")


def wcharplen(wchar_p):
    i = 0
    while ord(wchar_p[i]):
        i += 1
    return i

@API.func("HPy HPyUnicode_DecodeFSDefault(HPyContext *ctx, const char *v)")
def HPyUnicode_DecodeFSDefault(space, handles, ctx, v):
    w_bytes = space.newbytes(rffi.constcharp2str(v))
    w_decoded = space.fsdecode(w_bytes)
    return handles.new(w_decoded)

@API.func("HPy HPyUnicode_DecodeFSDefaultAndSize(HPyContext *ctx, const char *v, ssize_t size)")
def HPyUnicode_DecodeFSDefaultAndSize(space, handles, ctx, v, size):
    w_bytes = space.newbytes(rffi.constcharpsize2str(v, size))
    w_decoded = space.fsdecode(w_bytes)
    return handles.new(w_decoded)

@API.func("HPy HPyUnicode_DecodeASCII(HPyContext *ctx, const char *v, ssize_t size, const char * errors)")
def HPyUnicode_DecodeASCII(space, handles, ctx, v, size, errors):
    w_s = space.newbytes(rffi.constcharpsize2str(v, size))
    if errors:
            w_errors = space.newtext(rffi.constcharp2str(errors))
    else:
        w_errors = None
    w_decoded = space.call_method(w_s, 'decode', space.newtext('ascii'), w_errors)
    return handles.new(w_decoded)

@API.func("HPy HPyUnicode_DecodeLatin1(HPyContext *ctx, const char *v, ssize_t size, const char * errors)")
def HPyUnicode_DecodeLatin1(space, handles, ctx, v, size, errors):
    w_s = space.newbytes(rffi.constcharpsize2str(v, size))
    if errors:
            w_errors = space.newtext(rffi.constcharp2str(errors))
    else:
        w_errors = None
    w_decoded = space.call_method(w_s, 'decode', space.newtext('latin1'), w_errors)
    return handles.new(w_decoded)

# XXX fixme: should return HPy_UCS4, not long
@API.func("long HPyUnicode_ReadChar(HPyContext *ctx, HPy h, HPy_ssize_t index)",
          error_value=rffi.cast(rffi.LONG, -1))
def HPyUnicode_ReadChar(space, handles, ctx, h, index):
    w_unicode = handles.deref(h)
    if index < 0 or index > space.len_w(w_unicode):
        raise oefmt(space.w_IndexError, "string index out of range")
    w_ch = space.getitem(w_unicode, space.newint(index))
    return rffi.cast(rffi.LONG, space.int_w(space.ord(w_ch)))
    
@API.func("HPy HPyUnicode_Substring(HPyContext *ctx, HPy str, HPy_ssize_t start, HPy_ssize_t end)")
def HPyUnicode_Substring(space, handles, ctx, h, start, end):
    w_obj = handles.deref(h)
    if not space.isinstance_w(w_obj, space.w_text):
        raise oefmt(space.w_TypeError, "expected string object")
    if start == 0 and end == space.len_w(w_obj):
        return handles.new(w_obj)
    if start < 0 or end < 0:
        raise oefmt(space.w_IndexError, "string index out of range")
    w_result = space.call_method(w_obj, '__getitem__',
                         space.newslice(space.newint(start), space.newint(end),
                                        space.newint(1)))
    return handles.new(w_result)

@API.func("HPy HPyUnicode_FromEncodedObject(HPyContext *ctx, HPy obj, const char *encoding, const char *errors)")
def HPyUnicode_FromEncodedObject(space, handles, ctx, h, encoding, errors):
    if not h:
        raise oefmt(space.w_SystemError, "NULL handle passed to HPyUnicode_FromEncodedObject")
    w_obj = handles.deref(h)
    if space.isinstance_w(w_obj, space.w_bytes):
        s = space.bytes_w(w_obj)
        if not s:
            return handles.new(space.newtext(''))
    elif space.isinstance_w(w_obj, space.w_unicode):
        raise oefmt(space.w_TypeError, "decoding str is not supported")
    elif space.isinstance_w(w_obj, space.w_bytearray):   # Python 2.x specific
        raise oefmt(space.w_TypeError, "decoding bytearray is not supported")
    else:
        s = space.charbuf_w(w_obj)
    if encoding:
        w_encoding = space.newtext(rffi.constcharp2str(encoding))
    else:
        w_encoding = space.newtext('utf-8')
    w_str = space.newbytes(s)
    if errors:
        w_errors = space.newtext(rffi.constcharp2str(errors))
    else:
        w_errors = None
    w_result = space.call_method(w_str, 'decode', w_encoding, w_errors)
    return handles.new(w_result)