File: unicode.py

package info (click to toggle)

micropython 1.25.0%2Bds-1

links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 48,944 kB
sloc: ansic: 317,850; python: 59,539; xml: 4,241; makefile: 3,530; sh: 1,421; javascript: 744; asm: 681; cpp: 45; exp: 11; pascal: 6

file content (53 lines) | stat: -rw-r--r-- 1,362 bytes

# Test a UTF-8 encoded literal
s = "asdf©qwer"
for i in range(len(s)):
    print("s[%d]: %s   %X" % (i, s[i], ord(s[i])))

# Test all three forms of Unicode escape, and
# all blocks of UTF-8 byte patterns
s = "a\xa9\xff\u0123\u0800\uffee\U0001f44c"
for i in range(-len(s), len(s)):
    print("s[%d]: %s   %X" % (i, s[i], ord(s[i])))
    print("s[:%d]: %d chars, '%s'" % (i, len(s[:i]), s[:i]))
    for j in range(i, len(s)):
        print("s[%d:%d]: %d chars, '%s'" % (i, j, len(s[i:j]), s[i:j]))
    print("s[%d:]: %d chars, '%s'" % (i, len(s[i:]), s[i:]))

# Test UTF-8 encode and decode
enc = s.encode()
print(enc, enc.decode() == s)

# printing of unicode chars using repr
# NOTE: for some characters (eg \u10ff) we differ to CPython
print(repr("a\uffff"))
print(repr("a\U0001ffff"))

# test invalid escape code
try:
    eval('"\\U00110000"')
except SyntaxError:
    print("SyntaxError")

# test unicode string given to int
try:
    int("\u0200")
except ValueError:
    print("ValueError")

# test invalid UTF-8 string
try:
    str(b"ab\xa1", "utf8")
except UnicodeError:
    print("UnicodeError")
try:
    str(b"ab\xf8", "utf8")
except UnicodeError:
    print("UnicodeError")
try:
    str(bytearray(b"ab\xc0a"), "utf8")
except UnicodeError:
    print("UnicodeError")
try:
    str(b"\xf0\xe0\xed\xe8", "utf8")
except UnicodeError:
    print("UnicodeError")