1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
|
import random, sys
random.seed(42)
def make_random_encoded_string(length=10, variance=1):
s = []
s.append(random.choice(["\xff\xfe", "\xfe\xff", ""])) # BOM
for i in range(length + random.randrange(-variance, variance)):
s.append(chr(random.randrange(256)))
return "".join(s)
def make_random_unicode(length=10, variance=1):
s = []
for i in range(length + random.randrange(-variance, variance)):
s.append(unichr(random.randrange(sys.maxunicode)))
return "".join(s)
def check_encode(encoding, s):
try:
s.encode(encoding)
except UnicodeError:
pass
s.encode(encoding, "ignore")
s.encode(encoding, "replace")
def check_decode(encoding, s):
try:
s.decode(encoding)
except UnicodeError:
pass
s.decode(encoding, "ignore")
s.decode(encoding, "replace")
def check_with_length(length):
try:
s = make_random_encoded_string(length, 10)
for encoding in all_encodings:
check_decode(encoding, s)
except Exception as e:
print "decoding:", encoding, repr(s)
try:
s = make_random_unicode(length, 10)
for encoding in all_encodings:
check_encode(encoding, s)
except Exception as e:
print "encoding:", encoding, repr(s)
def main():
for length in range(0, 1000, 10):
print length
for i in range(100):
check_with_length(length)
length = 1000
for length in range(1000, 1000000, 1000):
print length
for i in range(100):
check_with_length(length)
all_encodings = "utf-8 latin1 ascii utf-16 utf-16-be utf-16-le utf-7".split()
if __name__ == '__main__':
main()
|