1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
|
"""This module adds a backwards-compatibility to the older wstring module.
It is intended for use by 4Suite only; do not use it in your own code."""
import string
import utf8_iso
_trans = string.maketrans("_:","- ")
def _normalize(codeset):
codeset = string.lower(codeset)
codeset = string.translate(codeset, _trans)
return codeset
class _Wstringmod:
"Emulator for old wstring module"
def __init__(self):
self.aliases = {'iso-ir-100' : 'iso-8859-1',
'cp819' : 'iso-8859-1',
'l1' : 'iso-8859-1',
'latin1' : 'iso-8859-1',
'ibm819' : 'iso-8859-1',
}
self.encodings = {'utf-8' : 0}
for i in range(1, len(utf8_iso.code_to_uni)):
if utf8_iso.code_to_uni[i]:
self.encodings['iso-8859-%d' % i] = i
def install_alias(self, newname, oldname):
self.aliases[_normalize(newname)] = _normalize(oldname)
def from_utf8(self, utf8):
return UTF8String(utf8)
def decode(self, encoding, string):
return UTF8String(string, encoding)
def chr(self, ch):
return UTF8String(utf8_iso.utf8chr(ch))
wstring = _Wstringmod()
class UTF8String:
"Emulator for the wstring type"
def __init__(self, string, encoding='utf-8'):
self.data = string
enc = _normalize(encoding)
codeset = wstring.encodings.get(enc)
if codeset is None:
if wstring.aliases.has_key(enc):
codeset = wstring.encoding.get(wstring.aliases[enc])
if codeset is None:
raise utf8_iso.ConvertError('Unknown encoding: %s' % encoding)
self.codeset = codeset
def utf8(self):
if self.codeset == 0:
return self.data
output = map(lambda char, codeset=self.codeset:
utf8_iso.code_to_utf8(codeset, char),
self.data)
return string.join(output, '')
def encode(self, encoding):
enc = _normalize(encoding)
codeset = wstring.encodings.get(enc)
if codeset is None:
if wstring.aliases.has_key(enc):
codeset = wstring.encoding.get(wstring.aliases[enc])
if codeset is None:
raise utf8_iso.ConvertError('Unknown encoding: %s' % encoding)
if codeset == 0:
return self.data
input = self.data
output = []
while input:
for i in range(len(input)):
if ord(input[i])>128:
break
if i == 0:
char, input = utf8_iso.utf8_to_code(codeset, input)
output.append(char)
else:
output.extend(list(input[:i]))
input = input[i:]
return string.join(output, '')
|