File: unicode.py

package info (click to toggle)
giac 1.6.0.41%2Bdfsg1-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 64,540 kB
  • sloc: cpp: 351,842; ansic: 105,138; python: 30,545; javascript: 8,675; yacc: 2,690; lex: 2,449; makefile: 1,243; sh: 579; perl: 314; lisp: 216; asm: 62; java: 41; sed: 16; csh: 7; pascal: 6
file content (53 lines) | stat: -rw-r--r-- 1,352 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# Test a UTF-8 encoded literal
s = "asdf©qwer"
for i in range(len(s)):
    print("s[%d]: %s   %X"%(i, s[i], ord(s[i])))

# Test all three forms of Unicode escape, and
# all blocks of UTF-8 byte patterns
s = "a\xA9\xFF\u0123\u0800\uFFEE\U0001F44C"
for i in range(-len(s), len(s)):
    print("s[%d]: %s   %X"%(i, s[i], ord(s[i])))
    print("s[:%d]: %d chars, '%s'"%(i, len(s[:i]), s[:i]))
    for j in range(i, len(s)):
        print("s[%d:%d]: %d chars, '%s'"%(i, j, len(s[i:j]), s[i:j]))
    print("s[%d:]: %d chars, '%s'"%(i, len(s[i:]), s[i:]))

# Test UTF-8 encode and decode
enc = s.encode()
print(enc, enc.decode() == s)

# printing of unicode chars using repr
# NOTE: for some characters (eg \u10ff) we differ to CPython
print(repr('a\uffff'))
print(repr('a\U0001ffff'))

# test invalid escape code
try:
    eval('"\\U00110000"')
except SyntaxError:
    print('SyntaxError')

# test unicode string given to int
try:
    int('\u0200')
except ValueError:
    print('ValueError')

# test invalid UTF-8 string
try:
    str(b'ab\xa1', 'utf8')
except UnicodeError:
    print('UnicodeError')
try:
    str(b'ab\xf8', 'utf8')
except UnicodeError:
    print('UnicodeError')
try:
    str(bytearray(b'ab\xc0a'), 'utf8')
except UnicodeError:
    print('UnicodeError')
try:
    str(b'\xf0\xe0\xed\xe8', 'utf8')
except UnicodeError:
    print('UnicodeError')