File: interp_unicode.py

package info (click to toggle)
pypy3 7.3.11%2Bdfsg-2%2Bdeb12u3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 201,024 kB
  • sloc: python: 1,950,308; ansic: 517,580; sh: 21,417; asm: 14,419; cpp: 4,263; makefile: 4,228; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 11; awk: 4
file content (129 lines) | stat: -rw-r--r-- 5,395 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.rlib import rutf8
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.unicodehelper import wcharpsize2utf8
from pypy.objspace.std import unicodeobject
from pypy.module._hpy_universal.apiset import API

def _maybe_utf8_to_w(space, utf8):
    # should this be a method of space?
    s = rffi.constcharp2str(utf8)
    try:
        length = rutf8.check_utf8(s, allow_surrogates=False)
    except rutf8.CheckError:
        raise   # XXX do something
    return space.newtext(s, length)

@API.func("int HPyUnicode_Check(HPyContext *ctx, HPy h)", error_value=API.int(-1))
def HPyUnicode_Check(space, handles, ctx, h):
    w_obj = handles.deref(h)
    w_obj_type = space.type(w_obj)
    res = (space.is_w(w_obj_type, space.w_unicode) or
           space.issubtype_w(w_obj_type, space.w_unicode))
    return API.int(res)

@API.func("HPy HPyUnicode_FromString(HPyContext *ctx, const char *utf8)")
def HPyUnicode_FromString(space, handles, ctx, utf8):
    w_obj = _maybe_utf8_to_w(space, utf8)
    return handles.new(w_obj)

@API.func("HPy HPyUnicode_AsUTF8String(HPyContext *ctx, HPy h)")
def HPyUnicode_AsUTF8String(space, handles, ctx, h):
    w_unicode = handles.deref(h)
    # XXX: what should we do if w_unicode is not a str?
    w_bytes = unicodeobject.encode_object(space, w_unicode, 'utf-8', 'strict')
    return handles.new(w_bytes)

@API.func("HPy HPyUnicode_AsASCIIString(HPyContext *ctx, HPy h)")
def HPyUnicode_AsASCIIString(space, handles, ctx, h):
    w_unicode = handles.deref(h)
    w_bytes = unicodeobject.encode_object(space, w_unicode, 'ascii', 'strict')
    return handles.new(w_bytes)

@API.func("HPy HPyUnicode_AsLatin1String(HPyContext *ctx, HPy h)")
def HPyUnicode_AsLatin1String(space, handles, ctx, h):
    w_unicode = handles.deref(h)
    w_bytes = unicodeobject.encode_object(space, w_unicode, 'latin1', 'strict')
    return handles.new(w_bytes)

@API.func("HPy HPyUnicode_EncodeFSDefault(HPyContext *ctx, HPy h)")
def HPyUnicode_EncodeFSDefault(space, handles, ctx, h):
    w_unicode = handles.deref(h)
    w_bytes = space.fsencode(w_unicode)
    return handles.new(w_bytes)
 
@API.func("const char *HPyUnicode_AsUTF8AndSize(HPyContext *ctx, HPy h, HPy_ssize_t *size)")
def HPyUnicode_AsUTF8AndSize(space, handles, ctx, h, size):
    w_unicode = handles.deref(h)
    # XXX: what should we do if w_unicode is not a str?
    s = space.utf8_w(w_unicode)
    if size:
        size[0] = len(s)
    res = handles.str2ownedptr(s, owner=h)
    return rffi.cast(rffi.CONST_CCHARP, res)

@API.func("HPy HPyUnicode_FromWideChar(HPyContext *ctx, const wchar_t *w, HPy_ssize_t size)")
def HPyUnicode_FromWideChar(space, handles, ctx, wchar_p, size):
    # remove the "const", else we can't call wcharpsize2utf8 later
    wchar_p = rffi.cast(rffi.CWCHARP, wchar_p)
    if wchar_p:
        if size == -1:
            size = wcharplen(wchar_p)
        # WRITE TEST: this automatically raises "character not in range", but
        # we don't have any test for it
        s = wcharpsize2utf8(space, wchar_p, size)
        w_obj = space.newutf8(s, size)
        return handles.new(w_obj)
    else:
        # cpyext returns an empty string, we need a test
        raise NotImplementedError("WRITE TEST")


def wcharplen(wchar_p):
    i = 0
    while ord(wchar_p[i]):
        i += 1
    return i

@API.func("HPy HPyUnicode_DecodeFSDefault(HPyContext *ctx, const char *v)")
def HPyUnicode_DecodeFSDefault(space, handles, ctx, v):
    w_bytes = space.newbytes(rffi.constcharp2str(v))
    w_decoded = space.fsdecode(w_bytes)
    return handles.new(w_decoded)

@API.func("HPy HPyUnicode_DecodeFSDefaultAndSize(HPyContext *ctx, const char *v, ssize_t size)")
def HPyUnicode_DecodeFSDefaultAndSize(space, handles, ctx, v, size):
    w_bytes = space.newbytes(rffi.constcharpsize2str(v, size))
    w_decoded = space.fsdecode(w_bytes)
    return handles.new(w_decoded)

@API.func("HPy HPyUnicode_DecodeASCII(HPyContext *ctx, const char *v, ssize_t size, const char * errors)")
def HPyUnicode_DecodeASCII(space, handles, ctx, v, size, errors):
    w_s = space.newbytes(rffi.constcharpsize2str(v, size))
    if errors:
            w_errors = space.newtext(rffi.constcharp2str(errors))
    else:
        w_errors = None
    w_decoded = space.call_method(w_s, 'decode', space.newtext('ascii'), w_errors)
    return handles.new(w_decoded)

@API.func("HPy HPyUnicode_DecodeLatin1(HPyContext *ctx, const char *v, ssize_t size, const char * errors)")
def HPyUnicode_DecodeLatin1(space, handles, ctx, v, size, errors):
    w_s = space.newbytes(rffi.constcharpsize2str(v, size))
    if errors:
            w_errors = space.newtext(rffi.constcharp2str(errors))
    else:
        w_errors = None
    w_decoded = space.call_method(w_s, 'decode', space.newtext('latin1'), w_errors)
    return handles.new(w_decoded)

# XXX fixme: should return HPy_UCS4, not long
@API.func("long HPyUnicode_ReadChar(HPyContext *ctx, HPy h, HPy_ssize_t index)",
          error_value=rffi.cast(rffi.LONG, -1))
def HPyUnicode_ReadChar(space, handles, ctx, h, index):
    w_unicode = handles.deref(h)
    if index < 0 or index > space.len_w(w_unicode):
        raise oefmt(space.w_IndexError, "string index out of range")
    w_ch = space.getitem(w_unicode, space.newint(index))
    return rffi.cast(rffi.LONG, space.int_w(space.ord(w_ch)))