File: unichar.py

package info (click to toggle)
pypy3 7.3.19%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212,236 kB
  • sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4
file content (56 lines) | stat: -rw-r--r-- 1,743 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""
Helpers to pack and unpack a unicode character into raw bytes.
"""

import sys

UNICODE_SIZE = 4
BIGENDIAN = sys.byteorder == "big"

def pack_unichar(unich, buf, pos):
    pack_codepoint(ord(unich), buf, pos)

def pack_codepoint(unich, buf, pos):
    if UNICODE_SIZE == 2:
        if BIGENDIAN:
            buf.setitem(pos,   chr(unich >> 8))
            buf.setitem(pos+1, chr(unich & 0xFF))
        else:
            buf.setitem(pos,   chr(unich & 0xFF))
            buf.setitem(pos+1, chr(unich >> 8))
    else:
        if BIGENDIAN:
            buf.setitem(pos,   chr(unich >> 24))
            buf.setitem(pos+1, chr((unich >> 16) & 0xFF))
            buf.setitem(pos+2, chr((unich >> 8) & 0xFF))
            buf.setitem(pos+3, chr(unich & 0xFF))
        else:
            buf.setitem(pos,   chr(unich & 0xFF))
            buf.setitem(pos+1, chr((unich >> 8) & 0xFF))
            buf.setitem(pos+2, chr((unich >> 16) & 0xFF))
            buf.setitem(pos+3, chr(unich >> 24))

def unpack_codepoint(rawstring):
    assert len(rawstring) == UNICODE_SIZE
    if UNICODE_SIZE == 2:
        if BIGENDIAN:
            n = (ord(rawstring[0]) << 8 |
                 ord(rawstring[1]))
        else:
            n = (ord(rawstring[0]) |
                 ord(rawstring[1]) << 8)
    else:
        if BIGENDIAN:
            n = (ord(rawstring[0]) << 24 |
                 ord(rawstring[1]) << 16 |
                 ord(rawstring[2]) << 8 |
                 ord(rawstring[3]))
        else:
            n = (ord(rawstring[0]) |
                 ord(rawstring[1]) << 8 |
                 ord(rawstring[2]) << 16 |
                 ord(rawstring[3]) << 24)
    return n

def unpack_unichar(rawstring):
    return unichr(unpack_codepoint(rawstring))